diff options
Diffstat (limited to 'src')
50 files changed, 49352 insertions, 8 deletions
diff --git a/src/Makefile.am b/src/Makefile.am index abb03c3f..a7f219c1 100644 --- a/src/Makefile.am +++ b/src/Makefile.am @@ -35,6 +35,11 @@ intel_drv_ladir = @moduledir@/drivers intel_drv_la_LIBADD = @UDEV_LIBS@ -lm @DRM_LIBS@ -ldrm_intel ../uxa/libuxa.la legacy/liblegacy.la intel_drv_la_LIBADD += @PCIACCESS_LIBS@ +if SNA +SUBDIRS += sna +intel_drv_la_LIBADD += sna/libsna.la +endif + NULL:=# intel_drv_la_SOURCES = \ diff --git a/src/intel_module.c b/src/intel_module.c index 9468e72f..9b1da491 100644 --- a/src/intel_module.c +++ b/src/intel_module.c @@ -36,6 +36,7 @@ #include "intel.h" #include "intel_driver.h" #include "legacy/legacy.h" +#include "sna/sna_module.h" #include <xf86drmMode.h> @@ -320,22 +321,49 @@ static Bool intel_pci_probe(DriverPtr driver, scrn->name = INTEL_NAME; scrn->Probe = NULL; -#if KMS_ONLY - intel_init_scrn(scrn); -#else switch (DEVICE_ID(device)) { +#if !KMS_ONLY case PCI_CHIP_I810: case PCI_CHIP_I810_DC100: case PCI_CHIP_I810_E: case PCI_CHIP_I815: lg_i810_init(scrn); break; +#endif +#if SNA + case 0: +#if SNA_GEN3 + case PCI_CHIP_PINEVIEW_M: + case PCI_CHIP_PINEVIEW_G: + case PCI_CHIP_G33_G: + case PCI_CHIP_Q35_G: + case PCI_CHIP_Q33_G: +#endif +#if SNA_GEN5 + case PCI_CHIP_IRONLAKE_D_G: + case PCI_CHIP_IRONLAKE_M_G: +#endif +#if SNA_GEN6 + case PCI_CHIP_SANDYBRIDGE_GT1: + case PCI_CHIP_SANDYBRIDGE_GT2: + case PCI_CHIP_SANDYBRIDGE_GT2_PLUS: + case PCI_CHIP_SANDYBRIDGE_M_GT1: + case PCI_CHIP_SANDYBRIDGE_M_GT2: + case PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS: + case PCI_CHIP_SANDYBRIDGE_S_GT: +#endif + sna_init_scrn(scrn); + break; +#endif default: +#if SNA_DEFAULT + sna_init_scrn(scrn); +#else intel_init_scrn(scrn); +#endif break; } -#endif } return scrn != NULL; } @@ -360,20 +388,46 @@ static XF86ModuleVersionInfo intel_version = { static const OptionInfoRec * intel_available_options(int chipid, int busid) { -#if KMS_ONLY - return intel_uxa_available_options(chipid, busid); -#else switch (chipid) { +#if !KMS_ONLY case PCI_CHIP_I810: case PCI_CHIP_I810_DC100: case PCI_CHIP_I810_E: case PCI_CHIP_I815: return lg_i810_available_options(chipid, busid); +#endif +#if SNA + case 0: +#if SNA_GEN3 + case PCI_CHIP_PINEVIEW_M: + case PCI_CHIP_PINEVIEW_G: + case PCI_CHIP_G33_G: + case PCI_CHIP_Q35_G: + case PCI_CHIP_Q33_G: +#endif +#if SNA_GEN5 + case PCI_CHIP_IRONLAKE_D_G: + case PCI_CHIP_IRONLAKE_M_G: +#endif +#if SNA_GEN6 + case PCI_CHIP_SANDYBRIDGE_GT1: + case PCI_CHIP_SANDYBRIDGE_GT2: + case PCI_CHIP_SANDYBRIDGE_GT2_PLUS: + case PCI_CHIP_SANDYBRIDGE_M_GT1: + case PCI_CHIP_SANDYBRIDGE_M_GT2: + case PCI_CHIP_SANDYBRIDGE_M_GT2_PLUS: + case PCI_CHIP_SANDYBRIDGE_S_GT: +#endif + return sna_available_options(chipid, busid); +#endif default: +#if SNA_DEFAULT + return sna_available_options(chipid, busid); +#else return intel_uxa_available_options(chipid, busid); - } #endif + } } static DriverRec intel = { diff --git a/src/sna/Makefile.am b/src/sna/Makefile.am new file mode 100644 index 00000000..f65b281b --- /dev/null +++ b/src/sna/Makefile.am @@ -0,0 +1,115 @@ +# Copyright 2005 Adam Jackson. +# +# Permission is hereby granted, free of charge, to any person obtaining a +# copy of this software and associated documentation files (the "Software"), +# to deal in the Software without restriction, including without limitation +# on the rights to use, copy, modify, merge, publish, distribute, sub +# license, and/or sell copies of the Software, and to permit persons to whom +# the Software is furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice (including the next +# paragraph) shall be included in all copies or substantial portions of the +# Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL +# ADAM JACKSON BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER +# IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN +# CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +AM_CFLAGS = @CWARNFLAGS@ @XORG_CFLAGS@ @UDEV_CFLAGS@ @DRM_CFLAGS@ @DRI_CFLAGS@ \ + -I$(top_srcdir)/src -I$(top_srcdir)/uxa -I$(top_srcdir)/src/render_program + +noinst_LTLIBRARIES = libsna.la +libsna_la_LIBADD = @UDEV_LIBS@ -lm @DRM_LIBS@ + +NULL:=# + +libsna_la_SOURCES = \ + blt.c \ + kgem.c \ + kgem.h \ + sna.h \ + sna_accel.c \ + sna_blt.c \ + sna_composite.c \ + sna_damage.c \ + snd_damage.h \ + sna_display.c \ + sna_driver.c \ + sna_driver.h \ + sna_glyphs.c \ + sna_gradient.c \ + sna_io.c \ + sna_render.c \ + sna_render.h \ + sna_render_inline.h \ + sna_reg.h \ + sna_stream.c \ + sna_trapezoids.c \ + sna_tiling.c \ + sna_transform.c \ + sna_video.c \ + sna_video.h \ + sna_video_overlay.c \ + sna_video_textured.c \ + $(NULL) + +if SNA_GEN2 +libsna_la_SOURCES += \ + gen2_render.c \ + gen2_render.h \ + $(NULL) +endif +if SNA_GEN3 +libsna_la_SOURCES += \ + gen3_render.c \ + gen3_render.h \ + $(NULL) +endif +if SNA_GEN4 +libsna_la_SOURCES += \ + gen4_render.c \ + gen4_render.h \ + $(NULL) +endif +if SNA_GEN5 +libsna_la_SOURCES += \ + gen5_render.c \ + gen5_render.h \ + $(NULL) +endif +if SNA_GEN6 +libsna_la_SOURCES += \ + gen6_render.c \ + gen6_render.h \ + $(NULL) +endif + +if DRI +libsna_la_SOURCES += \ + sna_dri.c \ + $(NULL) +libsna_la_LIBADD += \ + $(DRI_LIBS) \ + $(NULL) +endif + +if XVMC +libsna_la_SOURCES += \ + sna_video_hwmc.h \ + sna_video_hwmc.c \ + $(NULL) +endif + +if DEBUG +libsna_la_SOURCES += \ + kgem_debug.c \ + kgem_debug.h \ + kgem_debug_gen3.c \ + kgem_debug_gen4.c \ + kgem_debug_gen5.c \ + kgem_debug_gen6.c \ + $(NULL) +endif diff --git a/src/sna/README b/src/sna/README new file mode 100644 index 00000000..fd847de3 --- /dev/null +++ b/src/sna/README @@ -0,0 +1,30 @@ +SandyBridge's New Acceleration +------------------------------ + +The guiding principle behind the design is to avoid GPU context switches. +On SandyBridge (and beyond), these are especially pernicious because the +RENDER and BLT engine are now on different rings and require +synchronisation of the various execution units when switching contexts. +They were not cheap on early generation, but with the increasing +complexity of the GPU, avoiding such serialisations is important. + +Furthermore, we try very hard to avoid migrating between the CPU and GPU. +Every pixmap (apart from temporary "scratch" surfaces which we intend to +use on the GPU) is created in system memory. All operations are then done +upon this shadow copy until we are forced to move it onto the GPU. Such +migration can only be first triggered by: setting the pixmap as the +scanout (we obviously need a GPU buffer here), using the pixmap as a DRI +buffer (the client expects to perform hardware acceleration and we do not +want to disappoint) and lastly using the pixmap as a RENDER target. This +last is chosen because when we know we are going to perform hardware +acceleration and will continue to do so without fallbacks, using the GPU +is much, much faster than the CPU. The heuristic I chose therefore was +that if the application uses RENDER, i.e. cairo, then it will only be +using those paths and not intermixing core drawing operations and so +unlikely to trigger a fallback. + +The complicating case is front-buffer rendering. So in order to accommodate +using RENDER on an application whilst running xterm without a composite +manager redirecting all the pixmaps to backing surfaces, we have to +perform damage tracking to avoid excess migration of portions of the +buffer. diff --git a/src/sna/blt.c b/src/sna/blt.c new file mode 100644 index 00000000..ac20372e --- /dev/null +++ b/src/sna/blt.c @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Chris Wilson <chris@chris-wilson.co.uk> + * + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "sna.h" + +#if DEBUG_BLT +#undef DBG +#define DBG(x) ErrorF x +#else +#define NDEBUG 1 +#endif + +void +memcpy_blt(const void *src, void *dst, int bpp, + uint16_t src_stride, uint16_t dst_stride, + int16_t src_x, int16_t src_y, + int16_t dst_x, int16_t dst_y, + uint16_t width, uint16_t height) +{ + uint8_t *src_bytes; + uint8_t *dst_bytes; + + assert(width && height); + assert(bpp >= 8); + + DBG(("%s: src=(%d, %d), dst=(%d, %d), size=%dx%d, pitch=%d/%d\n", + __FUNCTION__, src_x, src_y, dst_x, dst_y, width, height, src_stride, dst_stride)); + + bpp /= 8; + width *= bpp; + + src_bytes = (uint8_t *)src + src_stride * src_y + src_x * bpp; + dst_bytes = (uint8_t *)dst + dst_stride * dst_y + dst_x * bpp; + + if (width == src_stride && width == dst_stride) { + memcpy(dst_bytes, src_bytes, width * height); + return; + } + + do { + memcpy(dst_bytes, src_bytes, width); + src_bytes += src_stride; + dst_bytes += dst_stride; + } while (--height); +} diff --git a/src/sna/gen2_render.c b/src/sna/gen2_render.c new file mode 100644 index 00000000..896f7308 --- /dev/null +++ b/src/sna/gen2_render.c @@ -0,0 +1,1237 @@ +/* + * Copyright © 2006,2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Wang Zhenyu <zhenyu.z.wang@intel.com> + * Eric Anholt <eric@anholt.net> + * Chris Wilson <chris@chris-wilson.co.uk> + * + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "sna.h" +#include "sna_reg.h" +#include "sna_render.h" +#include "sna_render_inline.h" + +#include "gen2_render.h" + +#if DEBUG_RENDER +#undef DBG +#define DBG(x) ErrorF x +#else +#define NDEBUG 1 +#endif + +#define OUT_BATCH(v) batch_emit(sna, v) +#define OUT_BATCH_F(v) batch_emit_float(sna, v) +#define OUT_VERTEX(v) batch_emit_float(sna, v) + +static const struct blendinfo { + Bool dst_alpha; + Bool src_alpha; + uint32_t src_blend; + uint32_t dst_blend; +} gen2_blend_op[] = { + /* Clear */ + {0, 0, BLENDFACTOR_ZERO, BLENDFACTOR_ZERO}, + /* Src */ + {0, 0, BLENDFACTOR_ONE, BLENDFACTOR_ZERO}, + /* Dst */ + {0, 0, BLENDFACTOR_ZERO, BLENDFACTOR_ONE}, + /* Over */ + {0, 1, BLENDFACTOR_ONE, BLENDFACTOR_INV_SRC_ALPHA}, + /* OverReverse */ + {1, 0, BLENDFACTOR_INV_DST_ALPHA, BLENDFACTOR_ONE}, + /* In */ + {1, 0, BLENDFACTOR_DST_ALPHA, BLENDFACTOR_ZERO}, + /* InReverse */ + {0, 1, BLENDFACTOR_ZERO, BLENDFACTOR_SRC_ALPHA}, + /* Out */ + {1, 0, BLENDFACTOR_INV_DST_ALPHA, BLENDFACTOR_ZERO}, + /* OutReverse */ + {0, 1, BLENDFACTOR_ZERO, BLENDFACTOR_INV_SRC_ALPHA}, + /* Atop */ + {1, 1, BLENDFACTOR_DST_ALPHA, BLENDFACTOR_INV_SRC_ALPHA}, + /* AtopReverse */ + {1, 1, BLENDFACTOR_INV_DST_ALPHA, BLENDFACTOR_SRC_ALPHA}, + /* Xor */ + {1, 1, BLENDFACTOR_INV_DST_ALPHA, BLENDFACTOR_INV_SRC_ALPHA}, + /* Add */ + {0, 0, BLENDFACTOR_ONE, BLENDFACTOR_ONE}, +}; + +static const struct formatinfo { + int fmt; + uint32_t card_fmt; +} i8xx_tex_formats[] = { + {PICT_a8, MAPSURF_8BIT | MT_8BIT_A8}, + {PICT_a8r8g8b8, MAPSURF_32BIT | MT_32BIT_ARGB8888}, + {PICT_a8b8g8r8, MAPSURF_32BIT | MT_32BIT_ABGR8888}, + {PICT_r5g6b5, MAPSURF_16BIT | MT_16BIT_RGB565}, + {PICT_a1r5g5b5, MAPSURF_16BIT | MT_16BIT_ARGB1555}, + {PICT_a4r4g4b4, MAPSURF_16BIT | MT_16BIT_ARGB4444}, +}, i85x_tex_formats[] = { + {PICT_x8r8g8b8, MAPSURF_32BIT | MT_32BIT_XRGB8888}, + {PICT_x8b8g8r8, MAPSURF_32BIT | MT_32BIT_XBGR8888}, +}; + +static inline uint32_t +gen2_buf_tiling(uint32_t tiling) +{ + uint32_t v = 0; + switch (tiling) { + case I915_TILING_Y: v |= BUF_3D_TILE_WALK_Y; + case I915_TILING_X: v |= BUF_3D_TILED_SURFACE; + case I915_TILING_NONE: break; + } + return v; +} + +static uint32_t +gen2_get_dst_format(uint32_t format) +{ +#define BIAS DSTORG_HORT_BIAS(0x8) | DSTORG_VERT_BIAS(0x8) + switch (format) { + default: + assert(0); + case PICT_a8r8g8b8: + case PICT_x8r8g8b8: + return COLR_BUF_ARGB8888 | BIAS; + case PICT_r5g6b5: + return COLR_BUF_RGB565 | BIAS; + case PICT_a1r5g5b5: + case PICT_x1r5g5b5: + return COLR_BUF_ARGB1555 | BIAS; + case PICT_a8: + return COLR_BUF_8BIT | BIAS; + case PICT_a4r4g4b4: + case PICT_x4r4g4b4: + return COLR_BUF_ARGB4444 | BIAS; + } +#undef BIAS +} + +static Bool +gen2_check_dst_format(uint32_t format) +{ + switch (format) { + case PICT_a8r8g8b8: + case PICT_x8r8g8b8: + case PICT_r5g6b5: + case PICT_a1r5g5b5: + case PICT_x1r5g5b5: + case PICT_a8: + case PICT_a4r4g4b4: + case PICT_x4r4g4b4: + return TRUE; + default: + return FALSE; + } +} + +static uint32_t +gen2_get_card_format(struct sna *sna, uint32_t format) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(i8xx_tex_formats); i++) { + if (i8xx_tex_formats[i].fmt == format) + return i8xx_tex_formats[i].card_fmt; + } + + if (!(IS_I830(sna) || IS_845G(sna))) { + for (i = 0; i < ARRAY_SIZE(i85x_tex_formats); i++) { + if (i85x_tex_formats[i].fmt == format) + return i85x_tex_formats[i].card_fmt; + } + } + + assert(0); + return 0; +} + +static Bool +gen2_check_card_format(struct sna *sna, uint32_t format) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(i8xx_tex_formats); i++) { + if (i8xx_tex_formats[i].fmt == format) + return TRUE; + } + + if (!(IS_I830(sna) || IS_845G(sna))) { + for (i = 0; i < ARRAY_SIZE(i85x_tex_formats); i++) { + if (i85x_tex_formats[i].fmt == format) + return TRUE; + } + } + + return FALSE; +} + +static uint32_t +gen2_sampler_tiling_bits(uint32_t tiling) +{ + uint32_t bits = 0; + switch (tiling) { + default: + assert(0); + case I915_TILING_Y: + bits |= TM0S1_TILE_WALK; + case I915_TILING_X: + bits |= TM0S1_TILED_SURFACE; + case I915_TILING_NONE: + break; + } + return bits; +} + +static Bool +gen2_check_filter(PicturePtr picture) +{ + switch (picture->filter) { + case PictFilterNearest: + case PictFilterBilinear: + return TRUE; + default: + return FALSE; + } +} + +static Bool +gen2_check_repeat(PicturePtr picture) +{ + if (!picture->repeat) + return TRUE; + + switch (picture->repeatType) { + case RepeatNone: + case RepeatNormal: + case RepeatPad: + case RepeatReflect: + return TRUE; + default: + return FALSE; + } +} + +static void +gen2_emit_texture(struct sna *sna, + const struct sna_composite_channel *channel, + int unit) +{ + uint32_t filter; + uint32_t wrap_mode; + uint32_t texcoordtype; + + if (channel->is_affine) + texcoordtype = TEXCOORDTYPE_CARTESIAN; + else + texcoordtype = TEXCOORDTYPE_HOMOGENEOUS; + + switch (channel->repeat) { + default: + assert(0); + case RepeatNone: + wrap_mode = TEXCOORDMODE_CLAMP_BORDER; + break; + case RepeatNormal: + wrap_mode = TEXCOORDMODE_WRAP; + break; + case RepeatPad: + wrap_mode = TEXCOORDMODE_CLAMP; + break; + case RepeatReflect: + wrap_mode = TEXCOORDMODE_MIRROR; + break; + } + + switch (channel->filter) { + default: + assert(0); + case PictFilterNearest: + filter = (FILTER_NEAREST << TM0S3_MAG_FILTER_SHIFT | + FILTER_NEAREST << TM0S3_MIN_FILTER_SHIFT); + break; + case PictFilterBilinear: + filter = (FILTER_LINEAR << TM0S3_MAG_FILTER_SHIFT | + FILTER_LINEAR << TM0S3_MIN_FILTER_SHIFT); + break; + } + filter |= MIPFILTER_NONE << TM0S3_MIP_FILTER_SHIFT; + + OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_2 | + LOAD_TEXTURE_MAP(unit) | 4); + OUT_BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch, + channel->bo, + I915_GEM_DOMAIN_SAMPLER << 16, + 0)); + OUT_BATCH(((channel->height - 1) << TM0S1_HEIGHT_SHIFT) | + ((channel->width - 1) << TM0S1_WIDTH_SHIFT) | + gen2_get_card_format(sna, channel->pict_format) | + gen2_sampler_tiling_bits(channel->bo->tiling)); + OUT_BATCH((channel->bo->pitch / 4 - 1) << TM0S2_PITCH_SHIFT | TM0S2_MAP_2D); + OUT_BATCH(filter); + OUT_BATCH(0); /* default color */ + OUT_BATCH(_3DSTATE_MAP_COORD_SET_CMD | TEXCOORD_SET(unit) | + ENABLE_TEXCOORD_PARAMS | TEXCOORDS_ARE_NORMAL | + texcoordtype | + ENABLE_ADDR_V_CNTL | TEXCOORD_ADDR_V_MODE(wrap_mode) | + ENABLE_ADDR_U_CNTL | TEXCOORD_ADDR_U_MODE(wrap_mode)); + /* map texel stream */ + OUT_BATCH(_3DSTATE_MAP_COORD_SETBIND_CMD); + if (unit == 0) + OUT_BATCH(TEXBIND_SET0(TEXCOORDSRC_VTXSET_0) | + TEXBIND_SET1(TEXCOORDSRC_KEEP) | + TEXBIND_SET2(TEXCOORDSRC_KEEP) | + TEXBIND_SET3(TEXCOORDSRC_KEEP)); + else + OUT_BATCH(TEXBIND_SET0(TEXCOORDSRC_VTXSET_0) | + TEXBIND_SET1(TEXCOORDSRC_VTXSET_1) | + TEXBIND_SET2(TEXCOORDSRC_KEEP) | + TEXBIND_SET3(TEXCOORDSRC_KEEP)); + OUT_BATCH(_3DSTATE_MAP_TEX_STREAM_CMD | + (unit << 16) | + DISABLE_TEX_STREAM_BUMP | + ENABLE_TEX_STREAM_COORD_SET | + TEX_STREAM_COORD_SET(unit) | + ENABLE_TEX_STREAM_MAP_IDX | + TEX_STREAM_MAP_IDX(unit)); +} + +static void +gen2_get_blend_factors(const struct sna_composite_op *op, + uint32_t *c_out, + uint32_t *a_out) +{ + uint32_t cblend, ablend; + + /* If component alpha is active in the mask and the blend operation + * uses the source alpha, then we know we don't need the source + * value (otherwise we would have hit a fallback earlier), so we + * provide the source alpha (src.A * mask.X) as output color. + * Conversely, if CA is set and we don't need the source alpha, then + * we produce the source value (src.X * mask.X) and the source alpha + * is unused.. Otherwise, we provide the non-CA source value + * (src.X * mask.A). + * + * The PICT_FORMAT_RGB(pict) == 0 fixups are not needed on 855+'s a8 + * pictures, but we need to implement it for 830/845 and there's no + * harm done in leaving it in. + */ + cblend = + TB0C_LAST_STAGE | TB0C_RESULT_SCALE_1X | TB0C_OP_MODULE | + TB0C_OUTPUT_WRITE_CURRENT; + ablend = + TB0A_RESULT_SCALE_1X | TB0A_OP_MODULE | + TB0A_OUTPUT_WRITE_CURRENT; + + /* Get the source picture's channels into TBx_ARG1 */ + if ((op->has_component_alpha && gen2_blend_op[op->op].src_alpha) || + op->dst.format == PICT_a8) { + /* Producing source alpha value, so the first set of channels + * is src.A instead of src.X. We also do this if the destination + * is a8, in which case src.G is what's written, and the other + * channels are ignored. + */ + ablend |= TB0A_ARG1_SEL_TEXEL0; + cblend |= TB0C_ARG1_SEL_TEXEL0 | TB0C_ARG1_REPLICATE_ALPHA; + } else { + if (PICT_FORMAT_RGB(op->src.pict_format) != 0) + cblend |= TB0C_ARG1_SEL_TEXEL0; + else + cblend |= TB0C_ARG1_SEL_ONE | TB0C_ARG1_INVERT; /* 0.0 */ + ablend |= TB0A_ARG1_SEL_TEXEL0; + } + + if (op->mask.bo) { + cblend |= TB0C_ARG2_SEL_TEXEL1; + if (op->dst.format == PICT_a8 || op->has_component_alpha) + cblend |= TB0C_ARG2_REPLICATE_ALPHA; + ablend |= TB0A_ARG2_SEL_TEXEL1; + } else { + cblend |= TB0C_ARG2_SEL_ONE; + ablend |= TB0A_ARG2_SEL_ONE; + } + + *c_out = cblend; + *a_out = ablend; +} + +static uint32_t gen2_get_blend_cntl(int op, + Bool has_component_alpha, + uint32_t dst_format) +{ + uint32_t sblend, dblend; + + sblend = gen2_blend_op[op].src_blend; + dblend = gen2_blend_op[op].dst_blend; + + /* If there's no dst alpha channel, adjust the blend op so that + * we'll treat it as always 1. + */ + if (PICT_FORMAT_A(dst_format) == 0 && gen2_blend_op[op].dst_alpha) { + if (sblend == BLENDFACTOR_DST_ALPHA) + sblend = BLENDFACTOR_ONE; + else if (sblend == BLENDFACTOR_INV_DST_ALPHA) + sblend = BLENDFACTOR_ZERO; + } + + /* If the source alpha is being used, then we should only be in a case + * where the source blend factor is 0, and the source blend value is + * the mask channels multiplied by the source picture's alpha. + */ + if (has_component_alpha && gen2_blend_op[op].src_alpha) { + if (dblend == BLENDFACTOR_SRC_ALPHA) + dblend = BLENDFACTOR_SRC_COLR; + else if (dblend == BLENDFACTOR_INV_SRC_ALPHA) + dblend = BLENDFACTOR_INV_SRC_COLR; + } + + return (sblend << S8_SRC_BLEND_FACTOR_SHIFT | + dblend << S8_DST_BLEND_FACTOR_SHIFT); +} + +static void gen2_emit_invariant(struct sna *sna) +{ + OUT_BATCH(_3DSTATE_MAP_CUBE | MAP_UNIT(0)); + OUT_BATCH(_3DSTATE_MAP_CUBE | MAP_UNIT(1)); + OUT_BATCH(_3DSTATE_MAP_CUBE | MAP_UNIT(2)); + OUT_BATCH(_3DSTATE_MAP_CUBE | MAP_UNIT(3)); + + OUT_BATCH(_3DSTATE_DFLT_DIFFUSE_CMD); + OUT_BATCH(0); + + OUT_BATCH(_3DSTATE_DFLT_SPEC_CMD); + OUT_BATCH(0); + + OUT_BATCH(_3DSTATE_DFLT_Z_CMD); + OUT_BATCH(0); + + OUT_BATCH(_3DSTATE_FOG_MODE_CMD); + OUT_BATCH(FOGFUNC_ENABLE | + FOG_LINEAR_CONST | FOGSRC_INDEX_Z | ENABLE_FOG_DENSITY); + OUT_BATCH(0); + OUT_BATCH(0); + + OUT_BATCH(_3DSTATE_MAP_TEX_STREAM_CMD | + MAP_UNIT(0) | + DISABLE_TEX_STREAM_BUMP | + ENABLE_TEX_STREAM_COORD_SET | + TEX_STREAM_COORD_SET(0) | + ENABLE_TEX_STREAM_MAP_IDX | + TEX_STREAM_MAP_IDX(0)); + OUT_BATCH(_3DSTATE_MAP_TEX_STREAM_CMD | + MAP_UNIT(1) | + DISABLE_TEX_STREAM_BUMP | + ENABLE_TEX_STREAM_COORD_SET | + TEX_STREAM_COORD_SET(1) | + ENABLE_TEX_STREAM_MAP_IDX | + TEX_STREAM_MAP_IDX(1)); + OUT_BATCH(_3DSTATE_MAP_TEX_STREAM_CMD | + MAP_UNIT(2) | + DISABLE_TEX_STREAM_BUMP | + ENABLE_TEX_STREAM_COORD_SET | + TEX_STREAM_COORD_SET(2) | + ENABLE_TEX_STREAM_MAP_IDX | + TEX_STREAM_MAP_IDX(2)); + OUT_BATCH(_3DSTATE_MAP_TEX_STREAM_CMD | + MAP_UNIT(3) | + DISABLE_TEX_STREAM_BUMP | + ENABLE_TEX_STREAM_COORD_SET | + TEX_STREAM_COORD_SET(3) | + ENABLE_TEX_STREAM_MAP_IDX | + TEX_STREAM_MAP_IDX(3)); + + OUT_BATCH(_3DSTATE_MAP_COORD_TRANSFORM); + OUT_BATCH(DISABLE_TEX_TRANSFORM | TEXTURE_SET(0)); + OUT_BATCH(_3DSTATE_MAP_COORD_TRANSFORM); + OUT_BATCH(DISABLE_TEX_TRANSFORM | TEXTURE_SET(1)); + OUT_BATCH(_3DSTATE_MAP_COORD_TRANSFORM); + OUT_BATCH(DISABLE_TEX_TRANSFORM | TEXTURE_SET(2)); + OUT_BATCH(_3DSTATE_MAP_COORD_TRANSFORM); + OUT_BATCH(DISABLE_TEX_TRANSFORM | TEXTURE_SET(3)); + + OUT_BATCH(_3DSTATE_RASTER_RULES_CMD | + ENABLE_POINT_RASTER_RULE | + OGL_POINT_RASTER_RULE | + ENABLE_LINE_STRIP_PROVOKE_VRTX | + ENABLE_TRI_FAN_PROVOKE_VRTX | + ENABLE_TRI_STRIP_PROVOKE_VRTX | + LINE_STRIP_PROVOKE_VRTX(1) | + TRI_FAN_PROVOKE_VRTX(2) | TRI_STRIP_PROVOKE_VRTX(2)); + + OUT_BATCH(_3DSTATE_SCISSOR_ENABLE_CMD | DISABLE_SCISSOR_RECT); + OUT_BATCH(_3DSTATE_SCISSOR_RECT_0_CMD); + OUT_BATCH(0); + OUT_BATCH(0); + + OUT_BATCH(_3DSTATE_VERTEX_TRANSFORM); + OUT_BATCH(DISABLE_VIEWPORT_TRANSFORM | DISABLE_PERSPECTIVE_DIVIDE); + + OUT_BATCH(_3DSTATE_W_STATE_CMD); + OUT_BATCH(MAGIC_W_STATE_DWORD1); + OUT_BATCH_F(1.0); + + OUT_BATCH(_3DSTATE_COLOR_FACTOR_CMD); + OUT_BATCH(0x80808080); /* .5 required in alpha for GL_DOT3_RGBA_EXT */ + + OUT_BATCH(_3DSTATE_MAP_COORD_SETBIND_CMD); + OUT_BATCH(TEXBIND_SET3(TEXCOORDSRC_VTXSET_3) | + TEXBIND_SET2(TEXCOORDSRC_VTXSET_2) | + TEXBIND_SET1(TEXCOORDSRC_VTXSET_1) | + TEXBIND_SET0(TEXCOORDSRC_VTXSET_0)); + + /* copy from mesa */ + OUT_BATCH(_3DSTATE_INDPT_ALPHA_BLEND_CMD | + DISABLE_INDPT_ALPHA_BLEND | + ENABLE_ALPHA_BLENDFUNC | ABLENDFUNC_ADD); + + OUT_BATCH(_3DSTATE_FOG_COLOR_CMD | + FOG_COLOR_RED(0) | FOG_COLOR_GREEN(0) | FOG_COLOR_BLUE(0)); + + OUT_BATCH(_3DSTATE_CONST_BLEND_COLOR_CMD); + OUT_BATCH(0); + + OUT_BATCH(_3DSTATE_MODES_1_CMD | + ENABLE_COLR_BLND_FUNC | + BLENDFUNC_ADD | + ENABLE_SRC_BLND_FACTOR | + SRC_BLND_FACT(BLENDFACTOR_ONE) | + ENABLE_DST_BLND_FACTOR | DST_BLND_FACT(BLENDFACTOR_ZERO)); + OUT_BATCH(_3DSTATE_MODES_2_CMD | + ENABLE_GLOBAL_DEPTH_BIAS | + GLOBAL_DEPTH_BIAS(0) | + ENABLE_ALPHA_TEST_FUNC | + ALPHA_TEST_FUNC(0) | /* always */ + ALPHA_REF_VALUE(0)); + OUT_BATCH(_3DSTATE_MODES_3_CMD | + ENABLE_DEPTH_TEST_FUNC | + DEPTH_TEST_FUNC(0x2) | /* COMPAREFUNC_LESS */ + ENABLE_ALPHA_SHADE_MODE | ALPHA_SHADE_MODE(SHADE_MODE_LINEAR) | + ENABLE_FOG_SHADE_MODE | FOG_SHADE_MODE(SHADE_MODE_LINEAR) | + ENABLE_SPEC_SHADE_MODE | SPEC_SHADE_MODE(SHADE_MODE_LINEAR) | + ENABLE_COLOR_SHADE_MODE | COLOR_SHADE_MODE(SHADE_MODE_LINEAR) | + ENABLE_CULL_MODE | CULLMODE_NONE); + + OUT_BATCH(_3DSTATE_MODES_4_CMD | + ENABLE_LOGIC_OP_FUNC | LOGIC_OP_FUNC(LOGICOP_COPY) | + ENABLE_STENCIL_TEST_MASK | STENCIL_TEST_MASK(0xff) | + ENABLE_STENCIL_WRITE_MASK | STENCIL_WRITE_MASK(0xff)); + + OUT_BATCH(_3DSTATE_STENCIL_TEST_CMD | + ENABLE_STENCIL_PARMS | + STENCIL_FAIL_OP(0) | /* STENCILOP_KEEP */ + STENCIL_PASS_DEPTH_FAIL_OP(0) | /* STENCILOP_KEEP */ + STENCIL_PASS_DEPTH_PASS_OP(0) | /* STENCILOP_KEEP */ + ENABLE_STENCIL_TEST_FUNC | STENCIL_TEST_FUNC(0) | /* COMPAREFUNC_ALWAYS */ + ENABLE_STENCIL_REF_VALUE | STENCIL_REF_VALUE(0)); + + OUT_BATCH(_3DSTATE_MODES_5_CMD | + FLUSH_TEXTURE_CACHE | + ENABLE_SPRITE_POINT_TEX | SPRITE_POINT_TEX_OFF | + ENABLE_FIXED_LINE_WIDTH | FIXED_LINE_WIDTH(0x2) | /* 1.0 */ + ENABLE_FIXED_POINT_WIDTH | FIXED_POINT_WIDTH(1)); + + OUT_BATCH(_3DSTATE_ENABLES_1_CMD | + DISABLE_LOGIC_OP | + DISABLE_STENCIL_TEST | + DISABLE_DEPTH_BIAS | + DISABLE_SPEC_ADD | + DISABLE_FOG | + DISABLE_ALPHA_TEST | + ENABLE_COLOR_BLEND | + DISABLE_DEPTH_TEST); + OUT_BATCH(_3DSTATE_ENABLES_2_CMD | + DISABLE_STENCIL_WRITE | + ENABLE_TEX_CACHE | + DISABLE_DITHER | + ENABLE_COLOR_MASK | + ENABLE_COLOR_WRITE | + DISABLE_DEPTH_WRITE); + + OUT_BATCH(_3DSTATE_STIPPLE); + + /* Set default blend state */ + OUT_BATCH(_3DSTATE_MAP_BLEND_OP_CMD(0) | + TEXPIPE_COLOR | + ENABLE_TEXOUTPUT_WRT_SEL | TEXOP_OUTPUT_CURRENT | + DISABLE_TEX_CNTRL_STAGE | + TEXOP_SCALE_1X | TEXOP_MODIFY_PARMS | + TEXOP_LAST_STAGE | TEXBLENDOP_ARG1); + OUT_BATCH(_3DSTATE_MAP_BLEND_OP_CMD(0) | + TEXPIPE_ALPHA | + ENABLE_TEXOUTPUT_WRT_SEL | TEXOP_OUTPUT_CURRENT | + TEXOP_SCALE_1X | TEXOP_MODIFY_PARMS | TEXBLENDOP_ARG1); + OUT_BATCH(_3DSTATE_MAP_BLEND_ARG_CMD(0) | + TEXPIPE_COLOR | + TEXBLEND_ARG1 | + TEXBLENDARG_MODIFY_PARMS | TEXBLENDARG_DIFFUSE); + OUT_BATCH(_3DSTATE_MAP_BLEND_ARG_CMD(0) | + TEXPIPE_ALPHA | + TEXBLEND_ARG1 | + TEXBLENDARG_MODIFY_PARMS | TEXBLENDARG_DIFFUSE); + + OUT_BATCH(_3DSTATE_AA_CMD | + AA_LINE_ECAAR_WIDTH_ENABLE | + AA_LINE_ECAAR_WIDTH_1_0 | + AA_LINE_REGION_WIDTH_ENABLE | + AA_LINE_REGION_WIDTH_1_0 | AA_LINE_DISABLE); + + sna->render_state.gen2.need_invariant = FALSE; +} + +static void +gen2_get_batch(struct sna *sna, + const struct sna_composite_op *op) +{ + kgem_set_mode(&sna->kgem, KGEM_RENDER); + + if (!kgem_check_batch(&sna->kgem, 50)) { + DBG(("%s: flushing batch: size %d > %d\n", + __FUNCTION__, 50, + sna->kgem.surface-sna->kgem.nbatch)); + kgem_submit(&sna->kgem); + } + + if (sna->kgem.nreloc + 3 > KGEM_RELOC_SIZE(&sna->kgem)) { + DBG(("%s: flushing batch: reloc %d >= %d\n", + __FUNCTION__, + sna->kgem.nreloc + 3, + (int)KGEM_RELOC_SIZE(&sna->kgem))); + kgem_submit(&sna->kgem); + } + + if (sna->kgem.nexec + 3 > KGEM_EXEC_SIZE(&sna->kgem)) { + DBG(("%s: flushing batch: exec %d >= %d\n", + __FUNCTION__, + sna->kgem.nexec + 1, + (int)KGEM_EXEC_SIZE(&sna->kgem))); + kgem_submit(&sna->kgem); + } + + if (sna->render_state.gen2.need_invariant) + gen2_emit_invariant(sna); +} + +static void gen2_emit_composite_state(struct sna *sna, + const struct sna_composite_op *op) +{ + uint32_t texcoordfmt; + uint32_t cblend, ablend; + + gen2_get_batch(sna, op); + + OUT_BATCH(_3DSTATE_BUF_INFO_CMD); + OUT_BATCH(BUF_3D_ID_COLOR_BACK | + gen2_buf_tiling(op->dst.bo->tiling) | + BUF_3D_PITCH(op->dst.bo->pitch)); + OUT_BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch, + op->dst.bo, + I915_GEM_DOMAIN_RENDER << 16 | + I915_GEM_DOMAIN_RENDER, + 0)); + + OUT_BATCH(_3DSTATE_DST_BUF_VARS_CMD); + OUT_BATCH(gen2_get_dst_format(op->dst.format)); + + OUT_BATCH(_3DSTATE_DRAW_RECT_CMD); + OUT_BATCH(0); + OUT_BATCH(0); /* ymin, xmin */ + OUT_BATCH(DRAW_YMAX(op->dst.height - 1) | + DRAW_XMAX(op->dst.width - 1)); + OUT_BATCH(0); /* yorig, xorig */ + + OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | + I1_LOAD_S(2) | I1_LOAD_S(3) | I1_LOAD_S(8) | 2); + OUT_BATCH((1 + (op->mask.bo != NULL)) << 12); + OUT_BATCH(S3_CULLMODE_NONE | S3_VERTEXHAS_XY); + OUT_BATCH(S8_ENABLE_COLOR_BLEND | S8_BLENDFUNC_ADD | + gen2_get_blend_cntl(op->op, + op->has_component_alpha, + op->dst.format) | + S8_ENABLE_COLOR_BUFFER_WRITE); + + OUT_BATCH(_3DSTATE_INDPT_ALPHA_BLEND_CMD | DISABLE_INDPT_ALPHA_BLEND); + + gen2_get_blend_factors(op, &cblend, &ablend); + OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_2 | + LOAD_TEXTURE_BLEND_STAGE(0) | 1); + OUT_BATCH(cblend); + OUT_BATCH(ablend); + + OUT_BATCH(_3DSTATE_ENABLES_1_CMD | DISABLE_LOGIC_OP | + DISABLE_STENCIL_TEST | DISABLE_DEPTH_BIAS | + DISABLE_SPEC_ADD | DISABLE_FOG | DISABLE_ALPHA_TEST | + ENABLE_COLOR_BLEND | DISABLE_DEPTH_TEST); + /* We have to explicitly say we don't want write disabled */ + OUT_BATCH(_3DSTATE_ENABLES_2_CMD | ENABLE_COLOR_MASK | + DISABLE_STENCIL_WRITE | ENABLE_TEX_CACHE | + DISABLE_DITHER | ENABLE_COLOR_WRITE | DISABLE_DEPTH_WRITE); + + texcoordfmt = 0; + if (op->src.is_affine) + texcoordfmt |= TEXCOORDFMT_2D << 0; + else + texcoordfmt |= TEXCOORDFMT_3D << 0; + if (op->mask.bo) { + if (op->mask.is_affine) + texcoordfmt |= TEXCOORDFMT_2D << 2; + else + texcoordfmt |= TEXCOORDFMT_3D << 2; + } + OUT_BATCH(_3DSTATE_VERTEX_FORMAT_2_CMD | texcoordfmt); + + gen2_emit_texture(sna, &op->src, 0); + if (op->mask.bo) + gen2_emit_texture(sna, &op->mask, 1); +} + +static inline void +gen2_emit_composite_dstcoord(struct sna *sna, int dstX, int dstY) +{ + OUT_VERTEX(dstX); + OUT_VERTEX(dstY); +} + +static void +gen2_emit_composite_texcoord(struct sna *sna, + const struct sna_composite_channel *channel, + int16_t x, int16_t y) +{ + float s = 0, t = 0, w = 1; + + x += channel->offset[0]; + y += channel->offset[1]; + + if (channel->is_affine) { + sna_get_transformed_coordinates(x, y, + channel->transform, + &s, &t); + OUT_VERTEX(s * channel->scale[0]); + OUT_VERTEX(t * channel->scale[1]); + } else { + sna_get_transformed_coordinates_3d(x, y, + channel->transform, + &s, &t, &w); + OUT_VERTEX(s * channel->scale[0]); + OUT_VERTEX(t * channel->scale[1]); + OUT_VERTEX(w); + } +} + +static void +gen2_emit_composite_vertex(struct sna *sna, + const struct sna_composite_op *op, + int16_t srcX, int16_t srcY, + int16_t mskX, int16_t mskY, + int16_t dstX, int16_t dstY) +{ + gen2_emit_composite_dstcoord(sna, dstX, dstY); + gen2_emit_composite_texcoord(sna, &op->src, srcX, srcY); + gen2_emit_composite_texcoord(sna, &op->mask, mskX, mskY); +} + +static void +gen2_emit_composite_primitive(struct sna *sna, + const struct sna_composite_op *op, + int16_t srcX, int16_t srcY, + int16_t mskX, int16_t mskY, + int16_t dstX, int16_t dstY, + int16_t w, int16_t h) +{ + dstX += op->dst.x; + dstY += op->dst.y; + + gen2_emit_composite_vertex(sna, op, + srcX + w, srcY + h, + mskX + w, mskY + h, + dstX + w, dstY + h); + gen2_emit_composite_vertex(sna, op, + srcX, srcY + h, + mskX, mskY + h, + dstX, dstY + h); + gen2_emit_composite_vertex(sna, op, + srcX, srcY, + mskX, mskY, + dstX, dstY); +} + +static void gen2_magic_ca_pass(struct sna *sna, + const struct sna_composite_op *op) +{ + uint32_t ablend, cblend; + + if (!op->need_magic_ca_pass) + return; + + OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(8) | 2); + OUT_BATCH(S8_ENABLE_COLOR_BLEND | S8_BLENDFUNC_ADD | + gen2_get_blend_cntl(PictOpAdd, + op->has_component_alpha, + op->dst.format) | + S8_ENABLE_COLOR_BUFFER_WRITE); + + gen2_get_blend_factors(op, &cblend, &ablend); + OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_2 | + LOAD_TEXTURE_BLEND_STAGE(0) | 1); + OUT_BATCH(cblend); + OUT_BATCH(ablend); + + memcpy(sna->kgem.batch + sna->kgem.nbatch, + sna->kgem.batch + sna->render_state.gen2.vertex_offset, + (1 + 3*sna->render.vertex_index)*sizeof(uint32_t)); + sna->kgem.nbatch += 1 + 3*sna->render.vertex_index; +} + +static void gen2_vertex_flush(struct sna *sna) +{ + if (sna->render.vertex_index == 0) + return; + + sna->kgem.batch[sna->render_state.gen2.vertex_offset] |= + sna->render.vertex_index - 1; + + if (sna->render.op) + gen2_magic_ca_pass(sna, sna->render.op); + + sna->render_state.gen2.vertex_offset = 0; + sna->render.vertex_index = 0; +} + +inline static int gen2_get_rectangles(struct sna *sna, + const const struct sna_composite_op *op, + int want) +{ + struct gen2_render_state *state = &sna->render_state.gen2; + int rem = batch_space(sna), size, need; + + need = 0; + size = 3*op->floats_per_vertex; + if (op->need_magic_ca_pass) + need += 5, size *= 2; + + need += size; + if (state->vertex_offset == 0) + need += 2; + + if (rem < need) + return 0; + + if (state->vertex_offset == 0) { + state->vertex_offset = sna->kgem.nbatch; + OUT_BATCH(PRIM3D_INLINE | PRIM3D_RECTLIST); + rem--; + } + + if (want * size > rem) + want = rem / size; + + sna->render.vertex_index += 3*want; + return want; +} + +fastcall static void +gen2_render_composite_blt(struct sna *sna, + const struct sna_composite_op *op, + const struct sna_composite_rectangles *r) +{ + if (!gen2_get_rectangles(sna, op, 1)) { + gen2_emit_composite_state(sna, op); + gen2_get_rectangles(sna, op, 1); + } + + gen2_emit_composite_primitive(sna, op, + r->src.x, r->src.y, + r->mask.x, r->mask.y, + r->dst.x, r->dst.y, + r->width, r->height); +} + +static void +gen2_render_composite_boxes(struct sna *sna, + const struct sna_composite_op *op, + const BoxRec *box, int nbox) +{ + do { + int nbox_this_time; + + nbox_this_time = gen2_get_rectangles(sna, op, nbox); + if (nbox_this_time == 0) { + gen2_emit_composite_state(sna, op); + nbox_this_time = gen2_get_rectangles(sna, op, nbox); + } + nbox -= nbox_this_time; + + do { + gen2_emit_composite_primitive(sna, op, + box->x1, box->y1, + box->x1, box->y1, + box->x1, box->y1, + box->x2 - box->x1, + box->y2 - box->y1); + box++; + } while (--nbox_this_time); + } while (nbox); +} + +static void gen2_render_composite_done(struct sna *sna, + const struct sna_composite_op *op) +{ + gen2_vertex_flush(sna); + sna->render.op = NULL; + _kgem_set_mode(&sna->kgem, KGEM_RENDER); + + sna_render_composite_redirect_done(sna, op); + + if (op->src.bo) + kgem_bo_destroy(&sna->kgem, op->src.bo); + if (op->mask.bo) + kgem_bo_destroy(&sna->kgem, op->mask.bo); +} + +static Bool +gen2_composite_solid_init(struct sna *sna, + struct sna_composite_channel *channel, + uint32_t color) +{ + channel->filter = PictFilterNearest; + channel->repeat = RepeatNormal; + channel->is_affine = TRUE; + channel->is_solid = TRUE; + channel->transform = NULL; + channel->width = 1; + channel->height = 1; + channel->pict_format = PICT_a8r8g8b8; + + channel->bo = sna_render_get_solid(sna, color); + + channel->scale[0] = channel->scale[1] = 1; + channel->offset[0] = channel->offset[1] = 0; + return channel->bo != NULL; +} + +static int +gen2_composite_picture(struct sna *sna, + PicturePtr picture, + struct sna_composite_channel *channel, + int x, int y, + int w, int h, + int dst_x, int dst_y) +{ + PixmapPtr pixmap; + uint32_t color; + int16_t dx, dy; + + DBG(("%s: (%d, %d)x(%d, %d), dst=(%d, %d)\n", + __FUNCTION__, x, y, w, h, dst_x, dst_y)); + + channel->is_solid = FALSE; + channel->card_format = -1; + + if (sna_picture_is_solid(picture, &color)) + return gen2_composite_solid_init(sna, channel, color); + + if (picture->pDrawable == NULL) + return sna_render_picture_fixup(sna, picture, channel, + x, y, w, h, dst_x, dst_y); + + if (!gen2_check_repeat(picture)) + return sna_render_picture_fixup(sna, picture, channel, + x, y, w, h, dst_x, dst_y); + + if (!gen2_check_filter(picture)) + return sna_render_picture_fixup(sna, picture, channel, + x, y, w, h, dst_x, dst_y); + + channel->repeat = picture->repeat ? picture->repeatType : RepeatNone; + channel->filter = picture->filter; + + pixmap = get_drawable_pixmap(picture->pDrawable); + get_drawable_deltas(picture->pDrawable, pixmap, &dx, &dy); + + x += dx + picture->pDrawable->x; + y += dy + picture->pDrawable->y; + + channel->is_affine = sna_transform_is_affine(picture->transform); + if (sna_transform_is_integer_translation(picture->transform, &dx, &dy)) { + DBG(("%s: integer translation (%d, %d), removing\n", + __FUNCTION__, dx, dy)); + x += dx; + y += dy; + channel->transform = NULL; + channel->filter = PictFilterNearest; + } else + channel->transform = picture->transform; + + if (!gen2_check_card_format(sna, picture->format)) + return sna_render_picture_convert(sna, picture, channel, pixmap, + x, y, w, h, dst_x, dst_y); + + channel->pict_format = picture->format; + if (pixmap->drawable.width > 8192 || pixmap->drawable.height > 8192) + return sna_render_picture_extract(sna, picture, channel, + x, y, w, h, dst_x, dst_y); + + return sna_render_pixmap_bo(sna, channel, pixmap, + x, y, w, h, dst_x, dst_y); +} + +static Bool +gen2_composite_set_target(struct sna *sna, + struct sna_composite_op *op, + PicturePtr dst) +{ + struct sna_pixmap *priv; + + op->dst.pixmap = get_drawable_pixmap(dst->pDrawable); + op->dst.format = dst->format; + op->dst.width = op->dst.pixmap->drawable.width; + op->dst.height = op->dst.pixmap->drawable.height; + + priv = sna_pixmap_force_to_gpu(op->dst.pixmap); + if (priv == NULL) + return FALSE; + + op->dst.bo = priv->gpu_bo; + if (!priv->gpu_only) + op->damage = &priv->gpu_damage; + + get_drawable_deltas(dst->pDrawable, op->dst.pixmap, + &op->dst.x, &op->dst.y); + return TRUE; +} + +static Bool +try_blt(struct sna *sna, + PicturePtr dst, + PicturePtr source, + int width, int height) +{ + uint32_t color; + + if (sna->kgem.mode == KGEM_BLT) { + DBG(("%s: already performing BLT\n", __FUNCTION__)); + return TRUE; + } + + if (width > 2048 || height > 2048) { + DBG(("%s: operation too large for 3D pipe (%d, %d)\n", + __FUNCTION__, width, height)); + return TRUE; + } + + /* If it is a solid, try to use the BLT paths */ + if (sna_picture_is_solid(source, &color)) + return TRUE; + + if (!source->pDrawable) + return FALSE; + + return is_cpu(source->pDrawable); +} + +static Bool +gen2_render_composite(struct sna *sna, + uint8_t op, + PicturePtr src, + PicturePtr mask, + PicturePtr dst, + int16_t src_x, int16_t src_y, + int16_t mask_x, int16_t mask_y, + int16_t dst_x, int16_t dst_y, + int16_t width, int16_t height, + struct sna_composite_op *tmp) +{ + DBG(("%s()\n", __FUNCTION__)); + + /* Try to use the BLT engine unless it implies a + * 3D -> 2D context switch. + */ + if (mask == NULL && + try_blt(sna, dst, src, width, height) && + sna_blt_composite(sna, + op, src, dst, + src_x, src_y, + dst_x, dst_y, + width, height, + tmp)) + return TRUE; + + if (op >= ARRAY_SIZE(gen2_blend_op)) { + DBG(("%s: fallback due to unhandled blend op: %d\n", + __FUNCTION__, op)); + return FALSE; + } + + if (!gen2_check_dst_format(dst->format)) { + DBG(("%s: fallback due to unhandled dst format: %x\n", + __FUNCTION__, dst->format)); + return FALSE; + } + + if (need_tiling(sna, width, height)) + return sna_tiling_composite(sna, + op, src, mask, dst, + src_x, src_y, + mask_x, mask_y, + dst_x, dst_y, + width, height, + tmp); + + memset(&tmp->u.gen2, 0, sizeof(tmp->u.gen2)); + + if (!gen2_composite_set_target(sna, tmp, dst)) { + DBG(("%s: unable to set render target\n", + __FUNCTION__)); + return FALSE; + } + + tmp->op = op; + if (tmp->dst.width > 2048 || + tmp->dst.height > 2048 || + tmp->dst.bo->pitch > 8192) { + if (!sna_render_composite_redirect(sna, tmp, + dst_x, dst_y, width, height)) + return FALSE; + } + + switch (gen2_composite_picture(sna, src, &tmp->src, + src_x, src_y, + width, height, + dst_x, dst_y)) { + case -1: + goto cleanup_dst; + case 0: + gen2_composite_solid_init(sna, &tmp->src, 0); + case 1: + break; + } + + if (mask) { + switch (gen2_composite_picture(sna, mask, &tmp->mask, + mask_x, mask_y, + width, height, + dst_x, dst_y)) { + case -1: + goto cleanup_src; + case 0: + gen2_composite_solid_init(sna, &tmp->mask, 0); + case 1: + break; + } + + if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format)) { + /* Check if it's component alpha that relies on a source alpha + * and on the source value. We can only get one of those + * into the single source value that we get to blend with. + */ + tmp->has_component_alpha = TRUE; + if (gen2_blend_op[op].src_alpha && + (gen2_blend_op[op].src_blend != BLENDFACTOR_ZERO)) { + if (op != PictOpOver) + return FALSE; + + tmp->need_magic_ca_pass = TRUE; + tmp->op = PictOpOutReverse; + } + } + } + + tmp->blt = gen2_render_composite_blt; + tmp->boxes = gen2_render_composite_boxes; + tmp->done = gen2_render_composite_done; + + if (!kgem_check_bo(&sna->kgem, tmp->dst.bo)) + kgem_submit(&sna->kgem); + if (!kgem_check_bo(&sna->kgem, tmp->src.bo)) + kgem_submit(&sna->kgem); + if (!kgem_check_bo(&sna->kgem, tmp->mask.bo)) + kgem_submit(&sna->kgem); + + if (kgem_bo_is_dirty(tmp->src.bo) || kgem_bo_is_dirty(tmp->mask.bo)) + kgem_emit_flush(&sna->kgem); + + gen2_emit_composite_state(sna, tmp); + + sna->render.op = tmp; + return TRUE; + +cleanup_src: + if (tmp->src.bo) + kgem_bo_destroy(&sna->kgem, tmp->src.bo); +cleanup_dst: + if (tmp->redirect.real_bo) + kgem_bo_destroy(&sna->kgem, tmp->dst.bo); + return FALSE; +} + +static void +gen2_render_reset(struct sna *sna) +{ + sna->render_state.gen2.need_invariant = TRUE; + sna->render_state.gen2.vertex_offset = 0; +} + +static void +gen2_render_flush(struct sna *sna) +{ + gen2_vertex_flush(sna); +} + +static void +gen2_render_context_switch(struct sna *sna, + int new_mode) +{ +} + +static void +gen2_render_fini(struct sna *sna) +{ +} + +Bool gen2_render_init(struct sna *sna) +{ + struct sna_render *render = &sna->render; + + gen2_render_reset(sna); + + /* Use the BLT (and overlay) for everything except when forced to + * use the texture combiners. + */ + render->composite = gen2_render_composite; + + /* XXX Y-tiling copies */ + + render->reset = gen2_render_reset; + render->flush = gen2_render_flush; + render->context_switch = gen2_render_context_switch; + render->fini = gen2_render_fini; + + render->max_3d_size = 2048; + return TRUE; +} diff --git a/src/sna/gen2_render.h b/src/sna/gen2_render.h new file mode 100644 index 00000000..945cd846 --- /dev/null +++ b/src/sna/gen2_render.h @@ -0,0 +1,785 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef GEN2_RENDER_H +#define GEN2_RENDER_H + +#define CMD_3D (0x3<<29) + +#define PRIM3D_INLINE (CMD_3D | (0x1f<<24)) +#define PRIM3D_TRILIST (0x0<<18) +#define PRIM3D_TRISTRIP (0x1<<18) +#define PRIM3D_TRISTRIP_RVRSE (0x2<<18) +#define PRIM3D_TRIFAN (0x3<<18) +#define PRIM3D_POLY (0x4<<18) +#define PRIM3D_LINELIST (0x5<<18) +#define PRIM3D_LINESTRIP (0x6<<18) +#define PRIM3D_RECTLIST (0x7<<18) +#define PRIM3D_POINTLIST (0x8<<18) +#define PRIM3D_DIB (0x9<<18) +#define PRIM3D_CLEAR_RECT (0xa<<18) +#define PRIM3D_ZONE_INIT (0xd<<18) +#define PRIM3D_MASK (0x1f<<18) + +#define _3DSTATE_AA_CMD (CMD_3D | (0x06<<24)) +#define AA_LINE_ECAAR_WIDTH_ENABLE (1<<16) +#define AA_LINE_ECAAR_WIDTH_0_5 0 +#define AA_LINE_ECAAR_WIDTH_1_0 (1<<14) +#define AA_LINE_ECAAR_WIDTH_2_0 (2<<14) +#define AA_LINE_ECAAR_WIDTH_4_0 (3<<14) +#define AA_LINE_REGION_WIDTH_ENABLE (1<<8) +#define AA_LINE_REGION_WIDTH_0_5 0 +#define AA_LINE_REGION_WIDTH_1_0 (1<<6) +#define AA_LINE_REGION_WIDTH_2_0 (2<<6) +#define AA_LINE_REGION_WIDTH_4_0 (3<<6) +#define AA_LINE_ENABLE ((1<<1) | 1) +#define AA_LINE_DISABLE (1<<1) + +#define _3DSTATE_BUF_INFO_CMD (CMD_3D | (0x1d<<24) | (0x8e<<16) | 1) +/* Dword 1 */ +#define BUF_3D_ID_COLOR_BACK (0x3<<24) +#define BUF_3D_ID_DEPTH (0x7<<24) +#define BUF_3D_USE_FENCE (1<<23) +#define BUF_3D_TILED_SURFACE (1<<22) +#define BUF_3D_TILE_WALK_X 0 +#define BUF_3D_TILE_WALK_Y (1<<21) +#define BUF_3D_PITCH(x) (((x)/4)<<2) +/* Dword 2 */ +#define BUF_3D_ADDR(x) ((x) & ~0x3) + +#define _3DSTATE_COLOR_FACTOR_CMD (CMD_3D | (0x1d<<24) | (0x1<<16)) + +#define _3DSTATE_COLOR_FACTOR_N_CMD(stage) (CMD_3D | (0x1d<<24) | \ + ((0x90+(stage))<<16)) + +#define _3DSTATE_CONST_BLEND_COLOR_CMD (CMD_3D | (0x1d<<24) | (0x88<<16)) + +#define _3DSTATE_DFLT_DIFFUSE_CMD (CMD_3D | (0x1d<<24) | (0x99<<16)) + +#define _3DSTATE_DFLT_SPEC_CMD (CMD_3D | (0x1d<<24) | (0x9a<<16)) + +#define _3DSTATE_DFLT_Z_CMD (CMD_3D | (0x1d<<24) | (0x98<<16)) + +#define _3DSTATE_DST_BUF_VARS_CMD (CMD_3D | (0x1d<<24) | (0x85<<16)) +/* Dword 1 */ +#define DSTORG_HORT_BIAS(x) ((x)<<20) +#define DSTORG_VERT_BIAS(x) ((x)<<16) +#define COLOR_4_2_2_CHNL_WRT_ALL 0 +#define COLOR_4_2_2_CHNL_WRT_Y (1<<12) +#define COLOR_4_2_2_CHNL_WRT_CR (2<<12) +#define COLOR_4_2_2_CHNL_WRT_CB (3<<12) +#define COLOR_4_2_2_CHNL_WRT_CRCB (4<<12) +#define COLR_BUF_8BIT 0 +#define COLR_BUF_RGB555 (1<<8) +#define COLR_BUF_RGB565 (2<<8) +#define COLR_BUF_ARGB8888 (3<<8) +#define COLR_BUF_ARGB4444 (8<<8) +#define COLR_BUF_ARGB1555 (9<<8) +#define DEPTH_IS_Z 0 +#define DEPTH_IS_W (1<<6) +#define DEPTH_FRMT_16_FIXED 0 +#define DEPTH_FRMT_16_FLOAT (1<<2) +#define DEPTH_FRMT_24_FIXED_8_OTHER (2<<2) +#define DEPTH_FRMT_24_FLOAT_8_OTHER (3<<2) +#define VERT_LINE_STRIDE_1 (1<<1) +#define VERT_LINE_STRIDE_0 0 +#define VERT_LINE_STRIDE_OFS_1 1 +#define VERT_LINE_STRIDE_OFS_0 0 + +#define _3DSTATE_DRAW_RECT_CMD (CMD_3D|(0x1d<<24)|(0x80<<16)|3) +/* Dword 1 */ +#define DRAW_RECT_DIS_DEPTH_OFS (1<<30) +#define DRAW_DITHER_OFS_X(x) ((x)<<26) +#define DRAW_DITHER_OFS_Y(x) ((x)<<24) +/* Dword 2 */ +#define DRAW_YMIN(x) ((x)<<16) +#define DRAW_XMIN(x) (x) +/* Dword 3 */ +#define DRAW_YMAX(x) ((x)<<16) +#define DRAW_XMAX(x) (x) +/* Dword 4 */ +#define DRAW_YORG(x) ((x)<<16) +#define DRAW_XORG(x) (x) + +#define _3DSTATE_ENABLES_1_CMD (CMD_3D|(0x3<<24)) +#define ENABLE_LOGIC_OP_MASK ((1<<23)|(1<<22)) +#define ENABLE_LOGIC_OP ((1<<23)|(1<<22)) +#define DISABLE_LOGIC_OP (1<<23) +#define ENABLE_STENCIL_TEST ((1<<21)|(1<<20)) +#define DISABLE_STENCIL_TEST (1<<21) +#define ENABLE_DEPTH_BIAS ((1<<11)|(1<<10)) +#define DISABLE_DEPTH_BIAS (1<<11) +#define ENABLE_SPEC_ADD_MASK ((1<<9)|(1<<8)) +#define ENABLE_SPEC_ADD ((1<<9)|(1<<8)) +#define DISABLE_SPEC_ADD (1<<9) +#define ENABLE_DIS_FOG_MASK ((1<<7)|(1<<6)) +#define ENABLE_FOG ((1<<7)|(1<<6)) +#define DISABLE_FOG (1<<7) +#define ENABLE_DIS_ALPHA_TEST_MASK ((1<<5)|(1<<4)) +#define ENABLE_ALPHA_TEST ((1<<5)|(1<<4)) +#define DISABLE_ALPHA_TEST (1<<5) +#define ENABLE_DIS_CBLEND_MASK ((1<<3)|(1<<2)) +#define ENABLE_COLOR_BLEND ((1<<3)|(1<<2)) +#define DISABLE_COLOR_BLEND (1<<3) +#define ENABLE_DIS_DEPTH_TEST_MASK ((1<<1)|1) +#define ENABLE_DEPTH_TEST ((1<<1)|1) +#define DISABLE_DEPTH_TEST (1<<1) + +/* _3DSTATE_ENABLES_2, p138 */ +#define _3DSTATE_ENABLES_2_CMD (CMD_3D|(0x4<<24)) +#define ENABLE_STENCIL_WRITE ((1<<21)|(1<<20)) +#define DISABLE_STENCIL_WRITE (1<<21) +#define ENABLE_TEX_CACHE ((1<<17)|(1<<16)) +#define DISABLE_TEX_CACHE (1<<17) +#define ENABLE_DITHER ((1<<9)|(1<<8)) +#define DISABLE_DITHER (1<<9) +#define ENABLE_COLOR_MASK (1<<10) +#define WRITEMASK_ALPHA (1<<7) +#define WRITEMASK_ALPHA_SHIFT 7 +#define WRITEMASK_RED (1<<6) +#define WRITEMASK_RED_SHIFT 6 +#define WRITEMASK_GREEN (1<<5) +#define WRITEMASK_GREEN_SHIFT 5 +#define WRITEMASK_BLUE (1<<4) +#define WRITEMASK_BLUE_SHIFT 4 +#define WRITEMASK_MASK ((1<<4)|(1<<5)|(1<<6)|(1<<7)) +#define ENABLE_COLOR_WRITE ((1<<3)|(1<<2)) +#define DISABLE_COLOR_WRITE (1<<3) +#define ENABLE_DIS_DEPTH_WRITE_MASK 0x3 +#define ENABLE_DEPTH_WRITE ((1<<1)|1) +#define DISABLE_DEPTH_WRITE (1<<1) + +/* _3DSTATE_FOG_COLOR, p139 */ +#define _3DSTATE_FOG_COLOR_CMD (CMD_3D|(0x15<<24)) +#define FOG_COLOR_RED(x) ((x)<<16) +#define FOG_COLOR_GREEN(x) ((x)<<8) +#define FOG_COLOR_BLUE(x) (x) + +/* _3DSTATE_FOG_MODE, p140 */ +#define _3DSTATE_FOG_MODE_CMD (CMD_3D|(0x1d<<24)|(0x89<<16)|2) +/* Dword 1 */ +#define FOGFUNC_ENABLE (1<<31) +#define FOGFUNC_VERTEX 0 +#define FOGFUNC_PIXEL_EXP (1<<28) +#define FOGFUNC_PIXEL_EXP2 (2<<28) +#define FOGFUNC_PIXEL_LINEAR (3<<28) +#define FOGSRC_INDEX_Z (1<<27) +#define FOGSRC_INDEX_W ((1<<27)|(1<<25)) +#define FOG_LINEAR_CONST (1<<24) +#define FOG_CONST_1(x) ((x)<<4) +#define ENABLE_FOG_DENSITY (1<<23) +/* Dword 2 */ +#define FOG_CONST_2(x) (x) +/* Dword 3 */ +#define FOG_DENSITY(x) (x) + +/* _3DSTATE_INDEPENDENT_ALPHA_BLEND, p142 */ +#define _3DSTATE_INDPT_ALPHA_BLEND_CMD (CMD_3D|(0x0b<<24)) +#define ENABLE_INDPT_ALPHA_BLEND ((1<<23)|(1<<22)) +#define DISABLE_INDPT_ALPHA_BLEND (1<<23) +#define ALPHA_BLENDFUNC_MASK 0x3f0000 +#define ENABLE_ALPHA_BLENDFUNC (1<<21) +#define ABLENDFUNC_ADD 0 +#define ABLENDFUNC_SUB (1<<16) +#define ABLENDFUNC_RVSE_SUB (2<<16) +#define ABLENDFUNC_MIN (3<<16) +#define ABLENDFUNC_MAX (4<<16) +#define SRC_DST_ABLEND_MASK 0xfff +#define ENABLE_SRC_ABLEND_FACTOR (1<<11) +#define SRC_ABLEND_FACT(x) ((x)<<6) +#define ENABLE_DST_ABLEND_FACTOR (1<<5) +#define DST_ABLEND_FACT(x) (x) + +#define BLENDFACTOR_ZERO 0x01 +#define BLENDFACTOR_ONE 0x02 +#define BLENDFACTOR_SRC_COLR 0x03 +#define BLENDFACTOR_INV_SRC_COLR 0x04 +#define BLENDFACTOR_SRC_ALPHA 0x05 +#define BLENDFACTOR_INV_SRC_ALPHA 0x06 +#define BLENDFACTOR_DST_ALPHA 0x07 +#define BLENDFACTOR_INV_DST_ALPHA 0x08 +#define BLENDFACTOR_DST_COLR 0x09 +#define BLENDFACTOR_INV_DST_COLR 0x0a +#define BLENDFACTOR_SRC_ALPHA_SATURATE 0x0b +#define BLENDFACTOR_CONST_COLOR 0x0c +#define BLENDFACTOR_INV_CONST_COLOR 0x0d +#define BLENDFACTOR_CONST_ALPHA 0x0e +#define BLENDFACTOR_INV_CONST_ALPHA 0x0f +#define BLENDFACTOR_MASK 0x0f + +/* _3DSTATE_MAP_BLEND_ARG, p152 */ +#define _3DSTATE_MAP_BLEND_ARG_CMD(stage) (CMD_3D|(0x0e<<24)|((stage)<<20)) + +#define TEXPIPE_COLOR 0 +#define TEXPIPE_ALPHA (1<<18) +#define TEXPIPE_KILL (2<<18) +#define TEXBLEND_ARG0 0 +#define TEXBLEND_ARG1 (1<<15) +#define TEXBLEND_ARG2 (2<<15) +#define TEXBLEND_ARG3 (3<<15) +#define TEXBLENDARG_MODIFY_PARMS (1<<6) +#define TEXBLENDARG_REPLICATE_ALPHA (1<<5) +#define TEXBLENDARG_INV_ARG (1<<4) +#define TEXBLENDARG_ONE 0 +#define TEXBLENDARG_FACTOR 0x01 +#define TEXBLENDARG_ACCUM 0x02 +#define TEXBLENDARG_DIFFUSE 0x03 +#define TEXBLENDARG_SPEC 0x04 +#define TEXBLENDARG_CURRENT 0x05 +#define TEXBLENDARG_TEXEL0 0x06 +#define TEXBLENDARG_TEXEL1 0x07 +#define TEXBLENDARG_TEXEL2 0x08 +#define TEXBLENDARG_TEXEL3 0x09 +#define TEXBLENDARG_FACTOR_N 0x0e + +/* _3DSTATE_MAP_BLEND_OP, p155 */ +#define _3DSTATE_MAP_BLEND_OP_CMD(stage) (CMD_3D|(0x0d<<24)|((stage)<<20)) +#if 0 +# define TEXPIPE_COLOR 0 +# define TEXPIPE_ALPHA (1<<18) +# define TEXPIPE_KILL (2<<18) +#endif +#define ENABLE_TEXOUTPUT_WRT_SEL (1<<17) +#define TEXOP_OUTPUT_CURRENT 0 +#define TEXOP_OUTPUT_ACCUM (1<<15) +#define ENABLE_TEX_CNTRL_STAGE ((1<<12)|(1<<11)) +#define DISABLE_TEX_CNTRL_STAGE (1<<12) +#define TEXOP_SCALE_SHIFT 9 +#define TEXOP_SCALE_1X (0 << TEXOP_SCALE_SHIFT) +#define TEXOP_SCALE_2X (1 << TEXOP_SCALE_SHIFT) +#define TEXOP_SCALE_4X (2 << TEXOP_SCALE_SHIFT) +#define TEXOP_MODIFY_PARMS (1<<8) +#define TEXOP_LAST_STAGE (1<<7) +#define TEXBLENDOP_KILLPIXEL 0x02 +#define TEXBLENDOP_ARG1 0x01 +#define TEXBLENDOP_ARG2 0x02 +#define TEXBLENDOP_MODULATE 0x03 +#define TEXBLENDOP_ADD 0x06 +#define TEXBLENDOP_ADDSIGNED 0x07 +#define TEXBLENDOP_BLEND 0x08 +#define TEXBLENDOP_BLEND_AND_ADD 0x09 +#define TEXBLENDOP_SUBTRACT 0x0a +#define TEXBLENDOP_DOT3 0x0b +#define TEXBLENDOP_DOT4 0x0c +#define TEXBLENDOP_MODULATE_AND_ADD 0x0d +#define TEXBLENDOP_MODULATE_2X_AND_ADD 0x0e +#define TEXBLENDOP_MODULATE_4X_AND_ADD 0x0f + +/* _3DSTATE_MAP_BUMP_TABLE, p160 TODO */ +/* _3DSTATE_MAP_COLOR_CHROMA_KEY, p161 TODO */ + +#define _3DSTATE_MAP_COORD_TRANSFORM ((3<<29)|(0x1d<<24)|(0x8c<<16)) +#define DISABLE_TEX_TRANSFORM (1<<28) +#define TEXTURE_SET(x) (x<<29) + +#define _3DSTATE_VERTEX_TRANSFORM ((3<<29)|(0x1d<<24)|(0x8b<<16)) +#define DISABLE_VIEWPORT_TRANSFORM (1<<31) +#define DISABLE_PERSPECTIVE_DIVIDE (1<<29) + +/* _3DSTATE_MAP_COORD_SET_BINDINGS, p162 */ +#define _3DSTATE_MAP_COORD_SETBIND_CMD (CMD_3D|(0x1d<<24)|(0x02<<16)) +#define TEXBIND_MASK3 ((1<<15)|(1<<14)|(1<<13)|(1<<12)) +#define TEXBIND_MASK2 ((1<<11)|(1<<10)|(1<<9)|(1<<8)) +#define TEXBIND_MASK1 ((1<<7)|(1<<6)|(1<<5)|(1<<4)) +#define TEXBIND_MASK0 ((1<<3)|(1<<2)|(1<<1)|1) + +#define TEXBIND_SET3(x) ((x)<<12) +#define TEXBIND_SET2(x) ((x)<<8) +#define TEXBIND_SET1(x) ((x)<<4) +#define TEXBIND_SET0(x) (x) + +#define TEXCOORDSRC_KEEP 0 +#define TEXCOORDSRC_DEFAULT 0x01 +#define TEXCOORDSRC_VTXSET_0 0x08 +#define TEXCOORDSRC_VTXSET_1 0x09 +#define TEXCOORDSRC_VTXSET_2 0x0a +#define TEXCOORDSRC_VTXSET_3 0x0b +#define TEXCOORDSRC_VTXSET_4 0x0c +#define TEXCOORDSRC_VTXSET_5 0x0d +#define TEXCOORDSRC_VTXSET_6 0x0e +#define TEXCOORDSRC_VTXSET_7 0x0f + +#define MAP_UNIT(unit) ((unit)<<16) +#define MAP_UNIT_MASK (0x7<<16) + +/* _3DSTATE_MAP_COORD_SETS, p164 */ +#define _3DSTATE_MAP_COORD_SET_CMD (CMD_3D|(0x1c<<24)|(0x01<<19)) +#define TEXCOORD_SET(n) ((n)<<16) +#define ENABLE_TEXCOORD_PARAMS (1<<15) +#define TEXCOORDS_ARE_NORMAL (1<<14) +#define TEXCOORDS_ARE_IN_TEXELUNITS 0 +#define TEXCOORDTYPE_CARTESIAN 0 +#define TEXCOORDTYPE_HOMOGENEOUS (1<<11) +#define TEXCOORDTYPE_VECTOR (2<<11) +#define TEXCOORDTYPE_MASK (0x7<<11) +#define ENABLE_ADDR_V_CNTL (1<<7) +#define ENABLE_ADDR_U_CNTL (1<<3) +#define TEXCOORD_ADDR_V_MODE(x) ((x)<<4) +#define TEXCOORD_ADDR_U_MODE(x) (x) +#define TEXCOORDMODE_WRAP 0 +#define TEXCOORDMODE_MIRROR 1 +#define TEXCOORDMODE_CLAMP 2 +#define TEXCOORDMODE_WRAP_SHORTEST 3 +#define TEXCOORDMODE_CLAMP_BORDER 4 +#define TEXCOORD_ADDR_V_MASK 0x70 +#define TEXCOORD_ADDR_U_MASK 0x7 + +/* _3DSTATE_MAP_CUBE, p168 TODO */ +#define _3DSTATE_MAP_CUBE (CMD_3D|(0x1c<<24)|(0x0a<<19)) +#define CUBE_NEGX_ENABLE (1<<5) +#define CUBE_POSX_ENABLE (1<<4) +#define CUBE_NEGY_ENABLE (1<<3) +#define CUBE_POSY_ENABLE (1<<2) +#define CUBE_NEGZ_ENABLE (1<<1) +#define CUBE_POSZ_ENABLE (1<<0) + +#define _3DSTATE_MAP_INFO_CMD (CMD_3D|(0x1d<<24)|(0x0<<16)|3) +#define TEXMAP_INDEX(x) ((x)<<28) +#define MAP_SURFACE_8BIT (1<<24) +#define MAP_SURFACE_16BIT (2<<24) +#define MAP_SURFACE_32BIT (3<<24) +#define MAP_FORMAT_2D (0) +#define MAP_FORMAT_3D_CUBE (1<<11) + +/* _3DSTATE_MODES_1, p190 */ +#define _3DSTATE_MODES_1_CMD (CMD_3D|(0x08<<24)) +#define BLENDFUNC_MASK 0x3f0000 +#define ENABLE_COLR_BLND_FUNC (1<<21) +#define BLENDFUNC_ADD 0 +#define BLENDFUNC_SUB (1<<16) +#define BLENDFUNC_RVRSE_SUB (2<<16) +#define BLENDFUNC_MIN (3<<16) +#define BLENDFUNC_MAX (4<<16) +#define SRC_DST_BLND_MASK 0xfff +#define ENABLE_SRC_BLND_FACTOR (1<<11) +#define ENABLE_DST_BLND_FACTOR (1<<5) +#define SRC_BLND_FACT(x) ((x)<<6) +#define DST_BLND_FACT(x) (x) + +/* _3DSTATE_MODES_2, p192 */ +#define _3DSTATE_MODES_2_CMD (CMD_3D|(0x0f<<24)) +#define ENABLE_GLOBAL_DEPTH_BIAS (1<<22) +#define GLOBAL_DEPTH_BIAS(x) ((x)<<14) +#define ENABLE_ALPHA_TEST_FUNC (1<<13) +#define ENABLE_ALPHA_REF_VALUE (1<<8) +#define ALPHA_TEST_FUNC(x) ((x)<<9) +#define ALPHA_REF_VALUE(x) (x) + +#define ALPHA_TEST_REF_MASK 0x3fff + +/* _3DSTATE_MODES_3, p193 */ +#define _3DSTATE_MODES_3_CMD (CMD_3D|(0x02<<24)) +#define DEPTH_TEST_FUNC_MASK 0x1f0000 +#define ENABLE_DEPTH_TEST_FUNC (1<<20) +/* Uses COMPAREFUNC */ +#define DEPTH_TEST_FUNC(x) ((x)<<16) +#define ENABLE_ALPHA_SHADE_MODE (1<<11) +#define ENABLE_FOG_SHADE_MODE (1<<9) +#define ENABLE_SPEC_SHADE_MODE (1<<7) +#define ENABLE_COLOR_SHADE_MODE (1<<5) +#define ALPHA_SHADE_MODE(x) ((x)<<10) +#define FOG_SHADE_MODE(x) ((x)<<8) +#define SPEC_SHADE_MODE(x) ((x)<<6) +#define COLOR_SHADE_MODE(x) ((x)<<4) +#define CULLMODE_MASK 0xf +#define ENABLE_CULL_MODE (1<<3) +#define CULLMODE_BOTH 0 +#define CULLMODE_NONE 1 +#define CULLMODE_CW 2 +#define CULLMODE_CCW 3 + +#define SHADE_MODE_LINEAR 0 +#define SHADE_MODE_FLAT 0x1 + +/* _3DSTATE_MODES_4, p195 */ +#define _3DSTATE_MODES_4_CMD (CMD_3D|(0x16<<24)) +#define ENABLE_LOGIC_OP_FUNC (1<<23) +#define LOGIC_OP_FUNC(x) ((x)<<18) +#define LOGICOP_MASK ((1<<18)|(1<<19)|(1<<20)|(1<<21)) +#define LOGICOP_CLEAR 0 +#define LOGICOP_NOR 0x1 +#define LOGICOP_AND_INV 0x2 +#define LOGICOP_COPY_INV 0x3 +#define LOGICOP_AND_RVRSE 0x4 +#define LOGICOP_INV 0x5 +#define LOGICOP_XOR 0x6 +#define LOGICOP_NAND 0x7 +#define LOGICOP_AND 0x8 +#define LOGICOP_EQUIV 0x9 +#define LOGICOP_NOOP 0xa +#define LOGICOP_OR_INV 0xb +#define LOGICOP_COPY 0xc +#define LOGICOP_OR_RVRSE 0xd +#define LOGICOP_OR 0xe +#define LOGICOP_SET 0xf +#define MODE4_ENABLE_STENCIL_TEST_MASK ((1<<17)|(0xff00)) +#define ENABLE_STENCIL_TEST_MASK (1<<17) +#define STENCIL_TEST_MASK(x) ((x)<<8) +#define MODE4_ENABLE_STENCIL_WRITE_MASK ((1<<16)|(0x00ff)) +#define ENABLE_STENCIL_WRITE_MASK (1<<16) +#define STENCIL_WRITE_MASK(x) ((x)&0xff) + +/* _3DSTATE_MODES_5, p196 */ +#define _3DSTATE_MODES_5_CMD (CMD_3D|(0x0c<<24)) +#define ENABLE_SPRITE_POINT_TEX (1<<23) +#define SPRITE_POINT_TEX_ON (1<<22) +#define SPRITE_POINT_TEX_OFF 0 +#define FLUSH_RENDER_CACHE (1<<18) +#define FLUSH_TEXTURE_CACHE (1<<16) +#define FIXED_LINE_WIDTH_MASK 0xfc00 +#define ENABLE_FIXED_LINE_WIDTH (1<<15) +#define FIXED_LINE_WIDTH(x) ((x)<<10) +#define FIXED_POINT_WIDTH_MASK 0x3ff +#define ENABLE_FIXED_POINT_WIDTH (1<<9) +#define FIXED_POINT_WIDTH(x) (x) + +/* _3DSTATE_RASTERIZATION_RULES, p198 */ +#define _3DSTATE_RASTER_RULES_CMD (CMD_3D|(0x07<<24)) +#define ENABLE_POINT_RASTER_RULE (1<<15) +#define OGL_POINT_RASTER_RULE (1<<13) +#define ENABLE_LINE_STRIP_PROVOKE_VRTX (1<<8) +#define ENABLE_TRI_FAN_PROVOKE_VRTX (1<<5) +#define ENABLE_TRI_STRIP_PROVOKE_VRTX (1<<2) +#define LINE_STRIP_PROVOKE_VRTX(x) ((x)<<6) +#define TRI_FAN_PROVOKE_VRTX(x) ((x)<<3) +#define TRI_STRIP_PROVOKE_VRTX(x) (x) + +/* _3DSTATE_SCISSOR_ENABLE, p200 */ +#define _3DSTATE_SCISSOR_ENABLE_CMD (CMD_3D|(0x1c<<24)|(0x10<<19)) +#define ENABLE_SCISSOR_RECT ((1<<1) | 1) +#define DISABLE_SCISSOR_RECT (1<<1) + +/* _3DSTATE_SCISSOR_RECTANGLE_0, p201 */ +#define _3DSTATE_SCISSOR_RECT_0_CMD (CMD_3D|(0x1d<<24)|(0x81<<16)|1) +/* Dword 1 */ +#define SCISSOR_RECT_0_YMIN(x) ((x)<<16) +#define SCISSOR_RECT_0_XMIN(x) (x) +/* Dword 2 */ +#define SCISSOR_RECT_0_YMAX(x) ((x)<<16) +#define SCISSOR_RECT_0_XMAX(x) (x) + +/* _3DSTATE_STENCIL_TEST, p202 */ +#define _3DSTATE_STENCIL_TEST_CMD (CMD_3D|(0x09<<24)) +#define ENABLE_STENCIL_PARMS (1<<23) +#define STENCIL_OPS_MASK (0xffc000) +#define STENCIL_FAIL_OP(x) ((x)<<20) +#define STENCIL_PASS_DEPTH_FAIL_OP(x) ((x)<<17) +#define STENCIL_PASS_DEPTH_PASS_OP(x) ((x)<<14) + +#define ENABLE_STENCIL_TEST_FUNC_MASK ((1<<13)|(1<<12)|(1<<11)|(1<<10)|(1<<9)) +#define ENABLE_STENCIL_TEST_FUNC (1<<13) +/* Uses COMPAREFUNC */ +#define STENCIL_TEST_FUNC(x) ((x)<<9) +#define STENCIL_REF_VALUE_MASK ((1<<8)|0xff) +#define ENABLE_STENCIL_REF_VALUE (1<<8) +#define STENCIL_REF_VALUE(x) (x) + +/* _3DSTATE_VERTEX_FORMAT, p204 */ +#define _3DSTATE_VFT0_CMD (CMD_3D|(0x05<<24)) +#define VFT0_POINT_WIDTH (1<<12) +#define VFT0_TEX_COUNT_MASK (7<<8) +#define VFT0_TEX_COUNT_SHIFT 8 +#define VFT0_TEX_COUNT(x) ((x)<<8) +#define VFT0_SPEC (1<<7) +#define VFT0_DIFFUSE (1<<6) +#define VFT0_DEPTH_OFFSET (1<<5) +#define VFT0_XYZ (1<<1) +#define VFT0_XYZW (2<<1) +#define VFT0_XY (3<<1) +#define VFT0_XYW (4<<1) +#define VFT0_XYZW_MASK (7<<1) + +/* _3DSTATE_VERTEX_FORMAT_2, p206 */ +#define _3DSTATE_VERTEX_FORMAT_2_CMD (CMD_3D|(0x0a<<24)) +#define VFT1_TEX7_FMT(x) ((x)<<14) +#define VFT1_TEX6_FMT(x) ((x)<<12) +#define VFT1_TEX5_FMT(x) ((x)<<10) +#define VFT1_TEX4_FMT(x) ((x)<<8) +#define VFT1_TEX3_FMT(x) ((x)<<6) +#define VFT1_TEX2_FMT(x) ((x)<<4) +#define VFT1_TEX1_FMT(x) ((x)<<2) +#define VFT1_TEX0_FMT(x) (x) +#define VFT1_TEX0_MASK 3 +#define VFT1_TEX1_SHIFT 2 +#define TEXCOORDFMT_2D 0 +#define TEXCOORDFMT_3D 1 +#define TEXCOORDFMT_4D 2 +#define TEXCOORDFMT_1D 3 + +/*New stuff picked up along the way */ + +#define MLC_LOD_BIAS_MASK ((1<<7)-1) + +/* _3DSTATE_VERTEX_TRANSFORM, p207 */ +#define _3DSTATE_VERTEX_TRANS_CMD (CMD_3D|(0x1d<<24)|(0x8b<<16)|0) +#define _3DSTATE_VERTEX_TRANS_MTX_CMD (CMD_3D|(0x1d<<24)|(0x8b<<16)|6) +/* Dword 1 */ +#define ENABLE_VIEWPORT_TRANSFORM ((1<<31)|(1<<30)) +#define DISABLE_VIEWPORT_TRANSFORM (1<<31) +#define ENABLE_PERSP_DIVIDE ((1<<29)|(1<<28)) +#define DISABLE_PERSP_DIVIDE (1<<29) +#define VRTX_TRANS_LOAD_MATRICES 0x7421 +#define VRTX_TRANS_NO_LOAD_MATRICES 0x0000 +/* Dword 2 -> 7 are matrix elements */ + +/* _3DSTATE_W_STATE, p209 */ +#define _3DSTATE_W_STATE_CMD (CMD_3D|(0x1d<<24)|(0x8d<<16)|1) +/* Dword 1 */ +#define MAGIC_W_STATE_DWORD1 0x00000008 +/* Dword 2 */ +#define WFAR_VALUE(x) (x) + +/* Stipple command, carried over from the i810, apparently: + */ +#define _3DSTATE_STIPPLE (CMD_3D|(0x1d<<24)|(0x83<<16)) +#define ST1_ENABLE (1<<16) +#define ST1_MASK (0xffff) + +#define _3DSTATE_LOAD_STATE_IMMEDIATE_1 (CMD_3D|(0x1d<<24)|(0x04<<16)) +#define I1_LOAD_S(n) (1<<((n)+4)) +#define S3_POINT_WIDTH_SHIFT 23 +#define S3_LINE_WIDTH_SHIFT 19 +#define S3_ALPHA_SHADE_MODE_SHIFT 18 +#define S3_FOG_SHADE_MODE_SHIFT 17 +#define S3_SPEC_SHADE_MODE_SHIFT 16 +#define S3_COLOR_SHADE_MODE_SHIFT 15 +#define S3_CULL_MODE_SHIFT 13 +#define S3_CULLMODE_BOTH (0) +#define S3_CULLMODE_NONE (1<<13) +#define S3_CULLMODE_CW (2<<13) +#define S3_CULLMODE_CCW (3<<13) +#define S3_POINT_WIDTH_PRESENT (1<<12) +#define S3_SPEC_FOG_PRESENT (1<<11) +#define S3_DIFFUSE_PRESENT (1<<10) +#define S3_DEPTH_OFFSET_PRESENT (1<<9) +#define S3_POSITION_SHIFT 6 +#define S3_VERTEXHAS_XYZ (1<<6) +#define S3_VERTEXHAS_XYZW (2<<6) +#define S3_VERTEXHAS_XY (3<<6) +#define S3_VERTEXHAS_XYW (4<<6) +#define S3_ENABLE_SPEC_ADD (1<<5) +#define S3_ENABLE_FOG (1<<4) +#define S3_ENABLE_LOCAL_DEPTH_BIAS (1<<3) +#define S3_ENABLE_SPRITE_POINT (1<<1) +#define S3_ENABLE_ANTIALIASING 1 +#define S8_ENABLE_ALPHA_TEST (1<<31) +#define S8_ALPHA_TEST_FUNC_SHIFT 28 +#define S8_ALPHA_REFVALUE_SHIFT 20 +#define S8_ENABLE_DEPTH_TEST (1<<19) +#define S8_DEPTH_TEST_FUNC_SHIFT 16 +#define S8_ENABLE_COLOR_BLEND (1<<15) +#define S8_COLOR_BLEND_FUNC_SHIFT 12 +#define S8_BLENDFUNC_ADD (0) +#define S8_BLENDFUNC_SUB (1<<12) +#define S8_BLENDFUNC_RVRSE_SUB (2<<12) +#define S8_BLENDFUNC_MIN (3<<12) +#define S8_BLENDFUNC_MAX (4<<12) +#define S8_SRC_BLEND_FACTOR_SHIFT 8 +#define S8_DST_BLEND_FACTOR_SHIFT 4 +#define S8_ENABLE_DEPTH_BUFFER_WRITE (1<<3) +#define S8_ENABLE_COLOR_BUFFER_WRITE (1<<2) + +#define _3DSTATE_LOAD_STATE_IMMEDIATE_2 (CMD_3D|(0x1d<<24)|(0x03<<16)) +#define LOAD_TEXTURE_MAP(x) (1<<((x)+11)) +#define LOAD_TEXTURE_BLEND_STAGE(x) (1<<((x)+7)) +#define LOAD_GLOBAL_COLOR_FACTOR (1<<6) + +#define TM0S0_ADDRESS_MASK 0xfffffffc +#define TM0S0_USE_FENCE (1<<1) + +#define TM0S1_HEIGHT_SHIFT 21 +#define TM0S1_WIDTH_SHIFT 10 +#define TM0S1_PALETTE_SELECT (1<<9) +#define TM0S1_MAPSURF_FORMAT_MASK (0x7 << 6) +#define TM0S1_MAPSURF_FORMAT_SHIFT 6 +#define MAPSURF_8BIT_INDEXED (0<<6) +#define MAPSURF_8BIT (1<<6) +#define MAPSURF_16BIT (2<<6) +#define MAPSURF_32BIT (3<<6) +#define MAPSURF_411 (4<<6) +#define MAPSURF_422 (5<<6) +#define MAPSURF_COMPRESSED (6<<6) +#define MAPSURF_4BIT_INDEXED (7<<6) +#define TM0S1_MT_FORMAT_MASK (0x7 << 3) +#define TM0S1_MT_FORMAT_SHIFT 3 +#define MT_4BIT_IDX_ARGB8888 (7<<3) /* SURFACE_4BIT_INDEXED */ +#define MT_8BIT_IDX_RGB565 (0<<3) /* SURFACE_8BIT_INDEXED */ +#define MT_8BIT_IDX_ARGB1555 (1<<3) +#define MT_8BIT_IDX_ARGB4444 (2<<3) +#define MT_8BIT_IDX_AY88 (3<<3) +#define MT_8BIT_IDX_ABGR8888 (4<<3) +#define MT_8BIT_IDX_BUMP_88DVDU (5<<3) +#define MT_8BIT_IDX_BUMP_655LDVDU (6<<3) +#define MT_8BIT_IDX_ARGB8888 (7<<3) +#define MT_8BIT_I8 (0<<3) /* SURFACE_8BIT */ +#define MT_8BIT_L8 (1<<3) +#define MT_8BIT_A8 (4<<3) +#define MT_16BIT_RGB565 (0<<3) /* SURFACE_16BIT */ +#define MT_16BIT_ARGB1555 (1<<3) +#define MT_16BIT_ARGB4444 (2<<3) +#define MT_16BIT_AY88 (3<<3) +#define MT_16BIT_DIB_ARGB1555_8888 (4<<3) +#define MT_16BIT_BUMP_88DVDU (5<<3) +#define MT_16BIT_BUMP_655LDVDU (6<<3) +#define MT_16BIT_DIB_RGB565_8888 (7<<3) +#define MT_32BIT_ARGB8888 (0<<3) /* SURFACE_32BIT */ +#define MT_32BIT_ABGR8888 (1<<3) +#define MT_32BIT_XRGB8888 (2<<3) +#define MT_32BIT_XBGR8888 (3<<3) +#define MT_32BIT_BUMP_XLDVDU_8888 (6<<3) +#define MT_32BIT_DIB_8888 (7<<3) +#define MT_411_YUV411 (0<<3) /* SURFACE_411 */ +#define MT_422_YCRCB_SWAPY (0<<3) /* SURFACE_422 */ +#define MT_422_YCRCB_NORMAL (1<<3) +#define MT_422_YCRCB_SWAPUV (2<<3) +#define MT_422_YCRCB_SWAPUVY (3<<3) +#define MT_COMPRESS_DXT1 (0<<3) /* SURFACE_COMPRESSED */ +#define MT_COMPRESS_DXT2_3 (1<<3) +#define MT_COMPRESS_DXT4_5 (2<<3) +#define MT_COMPRESS_FXT1 (3<<3) +#define TM0S1_COLORSPACE_CONVERSION (1 << 2) +#define TM0S1_TILED_SURFACE (1 << 1) +#define TM0S1_TILE_WALK (1 << 0) + +#define TM0S2_PITCH_SHIFT 21 +#define TM0S2_CUBE_FACE_ENA_SHIFT 15 +#define TM0S2_CUBE_FACE_ENA_MASK (1<<15) +#define TM0S2_MAP_FORMAT (1<<14) +#define TM0S2_MAP_2D (0<<14) +#define TM0S2_MAP_3D_CUBE (1<<14) +#define TM0S2_VERTICAL_LINE_STRIDE (1<<13) +#define TM0S2_VERITCAL_LINE_STRIDE_OFF (1<<12) +#define TM0S2_OUTPUT_CHAN_SHIFT 10 +#define TM0S2_OUTPUT_CHAN_MASK (3<<10) + +#define TM0S3_MIP_FILTER_MASK (0x3<<30) +#define TM0S3_MIP_FILTER_SHIFT 30 +#define MIPFILTER_NONE 0 +#define MIPFILTER_NEAREST 1 +#define MIPFILTER_LINEAR 3 +#define TM0S3_MAG_FILTER_MASK (0x3<<28) +#define TM0S3_MAG_FILTER_SHIFT 28 +#define TM0S3_MIN_FILTER_MASK (0x3<<26) +#define TM0S3_MIN_FILTER_SHIFT 26 +#define FILTER_NEAREST 0 +#define FILTER_LINEAR 1 +#define FILTER_ANISOTROPIC 2 + +#define TM0S3_LOD_BIAS_SHIFT 17 +#define TM0S3_LOD_BIAS_MASK (0x1ff<<17) +#define TM0S3_MAX_MIP_SHIFT 9 +#define TM0S3_MAX_MIP_MASK (0xff<<9) +#define TM0S3_MIN_MIP_SHIFT 3 +#define TM0S3_MIN_MIP_MASK (0x3f<<3) +#define TM0S3_KILL_PIXEL (1<<2) +#define TM0S3_KEYED_FILTER (1<<1) +#define TM0S3_CHROMA_KEY (1<<0) + +/* _3DSTATE_MAP_TEXEL_STREAM, p188 */ +#define _3DSTATE_MAP_TEX_STREAM_CMD (CMD_3D|(0x1c<<24)|(0x05<<19)) +#define DISABLE_TEX_STREAM_BUMP (1<<12) +#define ENABLE_TEX_STREAM_BUMP ((1<<12)|(1<<11)) +#define TEX_MODIFY_UNIT_0 0 +#define TEX_MODIFY_UNIT_1 (1<<8) +#define ENABLE_TEX_STREAM_COORD_SET (1<<7) +#define TEX_STREAM_COORD_SET(x) ((x)<<4) +#define ENABLE_TEX_STREAM_MAP_IDX (1<<3) +#define TEX_STREAM_MAP_IDX(x) (x) + +#define FLUSH_MAP_CACHE (1<<0) + +#define _3DSTATE_MAP_FILTER_CMD (CMD_3D|(0x1c<<24)|(0x02<<19)) +#define FILTER_TEXMAP_INDEX(x) ((x) << 16) +#define MAG_MODE_FILTER_ENABLE (1 << 5) +#define MIN_MODE_FILTER_ENABLE (1 << 2) +#define MAG_MAPFILTER_NEAREST (0 << 3) +#define MAG_MAPFILTER_LINEAR (1 << 3) +#define MAG_MAPFILTER_ANISOTROPIC (2 << 3) +#define MIN_MAPFILTER_NEAREST (0) +#define MIN_MAPFILTER_LINEAR (1) +#define MIN_MAPFILTER_ANISOTROPIC (2) +#define ENABLE_KEYS (1<<15) +#define DISABLE_COLOR_KEY 0 +#define DISABLE_CHROMA_KEY 0 +#define DISABLE_KILL_PIXEL 0 +#define ENABLE_MIP_MODE_FILTER (1 << 9) +#define MIPFILTER_NONE 0 +#define MIPFILTER_NEAREST 1 +#define MIPFILTER_LINEAR 3 + +#define TB0C_LAST_STAGE (1 << 31) +#define TB0C_RESULT_SCALE_1X (0 << 29) +#define TB0C_RESULT_SCALE_2X (1 << 29) +#define TB0C_RESULT_SCALE_4X (2 << 29) +#define TB0C_OP_MODULE (3 << 25) +#define TB0C_OUTPUT_WRITE_CURRENT (0 << 24) +#define TB0C_OUTPUT_WRITE_ACCUM (1 << 24) +#define TB0C_ARG3_REPLICATE_ALPHA (1<<23) +#define TB0C_ARG3_INVERT (1<<22) +#define TB0C_ARG3_SEL_XXX +#define TB0C_ARG2_REPLICATE_ALPHA (1<<17) +#define TB0C_ARG2_INVERT (1<<16) +#define TB0C_ARG2_SEL_ONE (0 << 12) +#define TB0C_ARG2_SEL_FACTOR (1 << 12) +#define TB0C_ARG2_SEL_TEXEL0 (6 << 12) +#define TB0C_ARG2_SEL_TEXEL1 (7 << 12) +#define TB0C_ARG2_SEL_TEXEL2 (8 << 12) +#define TB0C_ARG2_SEL_TEXEL3 (9 << 12) +#define TB0C_ARG1_REPLICATE_ALPHA (1<<11) +#define TB0C_ARG1_INVERT (1<<10) +#define TB0C_ARG1_SEL_ONE (0 << 6) +#define TB0C_ARG1_SEL_TEXEL0 (6 << 6) +#define TB0C_ARG1_SEL_TEXEL1 (7 << 6) +#define TB0C_ARG1_SEL_TEXEL2 (8 << 6) +#define TB0C_ARG1_SEL_TEXEL3 (9 << 6) +#define TB0C_ARG0_REPLICATE_ALPHA (1<<5) +#define TB0C_ARG0_SEL_XXX + +#define TB0A_CTR_STAGE_ENABLE (1<<31) +#define TB0A_RESULT_SCALE_1X (0 << 29) +#define TB0A_RESULT_SCALE_2X (1 << 29) +#define TB0A_RESULT_SCALE_4X (2 << 29) +#define TB0A_OP_MODULE (3 << 25) +#define TB0A_OUTPUT_WRITE_CURRENT (0<<24) +#define TB0A_OUTPUT_WRITE_ACCUM (1<<24) +#define TB0A_CTR_STAGE_SEL_BITS_XXX +#define TB0A_ARG3_SEL_XXX +#define TB0A_ARG3_INVERT (1<<17) +#define TB0A_ARG2_INVERT (1<<16) +#define TB0A_ARG2_SEL_ONE (0 << 12) +#define TB0A_ARG2_SEL_TEXEL0 (6 << 12) +#define TB0A_ARG2_SEL_TEXEL1 (7 << 12) +#define TB0A_ARG2_SEL_TEXEL2 (8 << 12) +#define TB0A_ARG2_SEL_TEXEL3 (9 << 12) +#define TB0A_ARG1_INVERT (1<<10) +#define TB0A_ARG1_SEL_ONE (0 << 6) +#define TB0A_ARG1_SEL_TEXEL0 (6 << 6) +#define TB0A_ARG1_SEL_TEXEL1 (7 << 6) +#define TB0A_ARG1_SEL_TEXEL2 (8 << 6) +#define TB0A_ARG1_SEL_TEXEL3 (9 << 6) + +#endif /* GEN2_RENDER_H */ diff --git a/src/sna/gen3_render.c b/src/sna/gen3_render.c new file mode 100644 index 00000000..203de08f --- /dev/null +++ b/src/sna/gen3_render.c @@ -0,0 +1,3694 @@ +/* + * Copyright © 2010-2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Chris Wilson <chris@chris-wilson.co.uk> + * + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "sna.h" +#include "sna_render.h" +#include "sna_render_inline.h" +#include "sna_reg.h" +#include "sna_video.h" + +#include "gen3_render.h" + +#if DEBUG_RENDER +#undef DBG +#define DBG(x) ErrorF x +#else +#define NDEBUG 1 +#endif + +#define NO_COMPOSITE 0 +#define NO_COMPOSITE_SPANS 0 +#define NO_COPY 0 +#define NO_COPY_BOXES 0 +#define NO_FILL 0 +#define NO_FILL_BOXES 0 + +enum { + SHADER_NONE = 0, + SHADER_ZERO, + SHADER_CONSTANT, + SHADER_LINEAR, + SHADER_RADIAL, + SHADER_TEXTURE, + SHADER_OPACITY, +}; + +#define OUT_BATCH(v) batch_emit(sna, v) +#define OUT_BATCH_F(v) batch_emit_float(sna, v) +#define OUT_VERTEX(v) vertex_emit(sna, v) + +enum gen3_radial_mode { + RADIAL_ONE, + RADIAL_TWO +}; + +static const struct blendinfo { + Bool dst_alpha; + Bool src_alpha; + uint32_t src_blend; + uint32_t dst_blend; +} gen3_blend_op[] = { + /* Clear */ {0, 0, BLENDFACT_ZERO, BLENDFACT_ZERO}, + /* Src */ {0, 0, BLENDFACT_ONE, BLENDFACT_ZERO}, + /* Dst */ {0, 0, BLENDFACT_ZERO, BLENDFACT_ONE}, + /* Over */ {0, 1, BLENDFACT_ONE, BLENDFACT_INV_SRC_ALPHA}, + /* OverReverse */ {1, 0, BLENDFACT_INV_DST_ALPHA, BLENDFACT_ONE}, + /* In */ {1, 0, BLENDFACT_DST_ALPHA, BLENDFACT_ZERO}, + /* InReverse */ {0, 1, BLENDFACT_ZERO, BLENDFACT_SRC_ALPHA}, + /* Out */ {1, 0, BLENDFACT_INV_DST_ALPHA, BLENDFACT_ZERO}, + /* OutReverse */ {0, 1, BLENDFACT_ZERO, BLENDFACT_INV_SRC_ALPHA}, + /* Atop */ {1, 1, BLENDFACT_DST_ALPHA, BLENDFACT_INV_SRC_ALPHA}, + /* AtopReverse */ {1, 1, BLENDFACT_INV_DST_ALPHA, BLENDFACT_SRC_ALPHA}, + /* Xor */ {1, 1, BLENDFACT_INV_DST_ALPHA, BLENDFACT_INV_SRC_ALPHA}, + /* Add */ {0, 0, BLENDFACT_ONE, BLENDFACT_ONE}, +}; + +static const struct formatinfo { + int fmt, xfmt; + uint32_t card_fmt; + Bool rb_reversed; +} gen3_tex_formats[] = { + {PICT_a8, 0, MAPSURF_8BIT | MT_8BIT_A8, FALSE}, + {PICT_a8r8g8b8, 0, MAPSURF_32BIT | MT_32BIT_ARGB8888, FALSE}, + {PICT_x8r8g8b8, 0, MAPSURF_32BIT | MT_32BIT_XRGB8888, FALSE}, + {PICT_a8b8g8r8, 0, MAPSURF_32BIT | MT_32BIT_ABGR8888, FALSE}, + {PICT_x8b8g8r8, 0, MAPSURF_32BIT | MT_32BIT_XBGR8888, FALSE}, + {PICT_a2r10g10b10, PICT_x2r10g10b10, MAPSURF_32BIT | MT_32BIT_ARGB2101010, FALSE}, + {PICT_a2b10g10r10, PICT_x2b10g10r10, MAPSURF_32BIT | MT_32BIT_ABGR2101010, FALSE}, + {PICT_r5g6b5, 0, MAPSURF_16BIT | MT_16BIT_RGB565, FALSE}, + {PICT_b5g6r5, 0, MAPSURF_16BIT | MT_16BIT_RGB565, TRUE}, + {PICT_a1r5g5b5, PICT_x1r5g5b5, MAPSURF_16BIT | MT_16BIT_ARGB1555, FALSE}, + {PICT_a1b5g5r5, PICT_x1b5g5r5, MAPSURF_16BIT | MT_16BIT_ARGB1555, TRUE}, + {PICT_a4r4g4b4, PICT_x4r4g4b4, MAPSURF_16BIT | MT_16BIT_ARGB4444, FALSE}, + {PICT_a4b4g4r4, PICT_x4b4g4r4, MAPSURF_16BIT | MT_16BIT_ARGB4444, TRUE}, +}; + +#define xFixedToDouble(f) pixman_fixed_to_double(f) + +static inline uint32_t gen3_buf_tiling(uint32_t tiling) +{ + uint32_t v = 0; + switch (tiling) { + case I915_TILING_Y: v |= BUF_3D_TILE_WALK_Y; + case I915_TILING_X: v |= BUF_3D_TILED_SURFACE; + case I915_TILING_NONE: break; + } + return v; +} + +static inline Bool +gen3_check_pitch_3d(struct kgem_bo *bo) +{ + return bo->pitch <= 8192; +} + +static uint32_t gen3_get_blend_cntl(int op, + Bool has_component_alpha, + uint32_t dst_format) +{ + uint32_t sblend = gen3_blend_op[op].src_blend; + uint32_t dblend = gen3_blend_op[op].dst_blend; + + /* If there's no dst alpha channel, adjust the blend op so that we'll + * treat it as always 1. + */ + if (gen3_blend_op[op].dst_alpha) { + if (PICT_FORMAT_A(dst_format) == 0) { + if (sblend == BLENDFACT_DST_ALPHA) + sblend = BLENDFACT_ONE; + else if (sblend == BLENDFACT_INV_DST_ALPHA) + sblend = BLENDFACT_ZERO; + } + + /* gen3 engine reads 8bit color buffer into green channel + * in cases like color buffer blending etc., and also writes + * back green channel. So with dst_alpha blend we should use + * color factor. See spec on "8-bit rendering". + */ + if (dst_format == PICT_a8) { + if (sblend == BLENDFACT_DST_ALPHA) + sblend = BLENDFACT_DST_COLR; + else if (sblend == BLENDFACT_INV_DST_ALPHA) + sblend = BLENDFACT_INV_DST_COLR; + } + } + + /* If the source alpha is being used, then we should only be in a case + * where the source blend factor is 0, and the source blend value is the + * mask channels multiplied by the source picture's alpha. + */ + if (has_component_alpha && gen3_blend_op[op].src_alpha) { + if (dblend == BLENDFACT_SRC_ALPHA) + dblend = BLENDFACT_SRC_COLR; + else if (dblend == BLENDFACT_INV_SRC_ALPHA) + dblend = BLENDFACT_INV_SRC_COLR; + } + + return (S6_CBUF_BLEND_ENABLE | S6_COLOR_WRITE_ENABLE | + BLENDFUNC_ADD << S6_CBUF_BLEND_FUNC_SHIFT | + sblend << S6_CBUF_SRC_BLEND_FACT_SHIFT | + dblend << S6_CBUF_DST_BLEND_FACT_SHIFT); +} + +static Bool gen3_check_dst_format(uint32_t format) +{ + switch (format) { + case PICT_a8r8g8b8: + case PICT_x8r8g8b8: + case PICT_a8b8g8r8: + case PICT_x8b8g8r8: + case PICT_r5g6b5: + case PICT_b5g6r5: + case PICT_a1r5g5b5: + case PICT_x1r5g5b5: + case PICT_a1b5g5r5: + case PICT_x1b5g5r5: + case PICT_a2r10g10b10: + case PICT_x2r10g10b10: + case PICT_a2b10g10r10: + case PICT_x2b10g10r10: + case PICT_a8: + case PICT_a4r4g4b4: + case PICT_x4r4g4b4: + case PICT_a4b4g4r4: + case PICT_x4b4g4r4: + return TRUE; + default: + return FALSE; + } +} + +static Bool gen3_dst_rb_reversed(uint32_t format) +{ + switch (format) { + case PICT_a8r8g8b8: + case PICT_x8r8g8b8: + case PICT_r5g6b5: + case PICT_a1r5g5b5: + case PICT_x1r5g5b5: + case PICT_a2r10g10b10: + case PICT_x2r10g10b10: + case PICT_a8: + case PICT_a4r4g4b4: + case PICT_x4r4g4b4: + return FALSE; + default: + return TRUE; + } +} + +#define DSTORG_HORT_BIAS(x) ((x)<<20) +#define DSTORG_VERT_BIAS(x) ((x)<<16) + +static uint32_t gen3_get_dst_format(uint32_t format) +{ +#define BIAS (DSTORG_HORT_BIAS(0x8) | DSTORG_VERT_BIAS(0x8)) + switch (format) { + default: + case PICT_a8r8g8b8: + case PICT_x8r8g8b8: + case PICT_a8b8g8r8: + case PICT_x8b8g8r8: + return BIAS | COLR_BUF_ARGB8888; + case PICT_r5g6b5: + case PICT_b5g6r5: + return BIAS | COLR_BUF_RGB565; + case PICT_a1r5g5b5: + case PICT_x1r5g5b5: + case PICT_a1b5g5r5: + case PICT_x1b5g5r5: + return BIAS | COLR_BUF_ARGB1555; + case PICT_a2r10g10b10: + case PICT_x2r10g10b10: + case PICT_a2b10g10r10: + case PICT_x2b10g10r10: + return BIAS | COLR_BUF_ARGB2AAA; + case PICT_a8: + return BIAS | COLR_BUF_8BIT; + case PICT_a4r4g4b4: + case PICT_x4r4g4b4: + case PICT_a4b4g4r4: + case PICT_x4b4g4r4: + return BIAS | COLR_BUF_ARGB4444; + } +#undef BIAS +} + +static uint32_t gen3_texture_repeat(uint32_t repeat) +{ +#define REPEAT(x) \ + (SS3_NORMALIZED_COORDS | \ + TEXCOORDMODE_##x << SS3_TCX_ADDR_MODE_SHIFT | \ + TEXCOORDMODE_##x << SS3_TCY_ADDR_MODE_SHIFT) + switch (repeat) { + default: + case RepeatNone: + return REPEAT(CLAMP_BORDER); + case RepeatNormal: + return REPEAT(WRAP); + case RepeatPad: + return REPEAT(CLAMP_EDGE); + case RepeatReflect: + return REPEAT(MIRROR); + } +#undef REPEAT +} + +static uint32_t gen3_gradient_repeat(uint32_t repeat) +{ +#define REPEAT(x) \ + (SS3_NORMALIZED_COORDS | \ + TEXCOORDMODE_##x << SS3_TCX_ADDR_MODE_SHIFT | \ + TEXCOORDMODE_WRAP << SS3_TCY_ADDR_MODE_SHIFT) + switch (repeat) { + default: + case RepeatNone: + return REPEAT(CLAMP_BORDER); + case RepeatNormal: + return REPEAT(WRAP); + case RepeatPad: + return REPEAT(CLAMP_EDGE); + case RepeatReflect: + return REPEAT(MIRROR); + } +#undef REPEAT +} + +static Bool gen3_check_repeat(uint32_t repeat) +{ + switch (repeat) { + case RepeatNone: + case RepeatNormal: + case RepeatPad: + case RepeatReflect: + return TRUE; + default: + return FALSE; + } +} + +static uint32_t gen3_filter(uint32_t filter) +{ + switch (filter) { + default: + assert(0); + case PictFilterNearest: + return (FILTER_NEAREST << SS2_MAG_FILTER_SHIFT | + FILTER_NEAREST << SS2_MIN_FILTER_SHIFT | + MIPFILTER_NONE << SS2_MIP_FILTER_SHIFT); + case PictFilterBilinear: + return (FILTER_LINEAR << SS2_MAG_FILTER_SHIFT | + FILTER_LINEAR << SS2_MIN_FILTER_SHIFT | + MIPFILTER_NONE << SS2_MIP_FILTER_SHIFT); + } +} + +static bool gen3_check_filter(uint32_t filter) +{ + switch (filter) { + case PictFilterNearest: + case PictFilterBilinear: + return TRUE; + default: + return FALSE; + } +} + +static inline void +gen3_emit_composite_dstcoord(struct sna *sna, int16_t dstX, int16_t dstY) +{ + OUT_VERTEX(dstX); + OUT_VERTEX(dstY); +} + +fastcall static void +gen3_emit_composite_primitive_constant(struct sna *sna, + const struct sna_composite_op *op, + const struct sna_composite_rectangles *r) +{ + int16_t dst_x = r->dst.x + op->dst.x; + int16_t dst_y = r->dst.y + op->dst.y; + + gen3_emit_composite_dstcoord(sna, dst_x + r->width, dst_y + r->height); + gen3_emit_composite_dstcoord(sna, dst_x, dst_y + r->height); + gen3_emit_composite_dstcoord(sna, dst_x, dst_y); +} + +fastcall static void +gen3_emit_composite_primitive_identity_gradient(struct sna *sna, + const struct sna_composite_op *op, + const struct sna_composite_rectangles *r) +{ + int16_t dst_x, dst_y; + int16_t src_x, src_y; + + dst_x = r->dst.x + op->dst.x; + dst_y = r->dst.y + op->dst.y; + src_x = r->src.x + op->src.offset[0]; + src_y = r->src.y + op->src.offset[1]; + + gen3_emit_composite_dstcoord(sna, dst_x + r->width, dst_y + r->height); + OUT_VERTEX(src_x + r->width); + OUT_VERTEX(src_y + r->height); + + gen3_emit_composite_dstcoord(sna, dst_x, dst_y + r->height); + OUT_VERTEX(src_x); + OUT_VERTEX(src_y + r->height); + + gen3_emit_composite_dstcoord(sna, dst_x, dst_y); + OUT_VERTEX(src_x); + OUT_VERTEX(src_y); +} + +fastcall static void +gen3_emit_composite_primitive_affine_gradient(struct sna *sna, + const struct sna_composite_op *op, + const struct sna_composite_rectangles *r) +{ + PictTransform *transform = op->src.transform; + int16_t dst_x, dst_y; + int16_t src_x, src_y; + float sx, sy; + + dst_x = r->dst.x + op->dst.x; + dst_y = r->dst.y + op->dst.y; + src_x = r->src.x + op->src.offset[0]; + src_y = r->src.y + op->src.offset[1]; + + sna_get_transformed_coordinates(src_x + r->width, src_y + r->height, + transform, + &sx, &sy); + gen3_emit_composite_dstcoord(sna, dst_x + r->width, dst_y + r->height); + OUT_VERTEX(sx); + OUT_VERTEX(sy); + + sna_get_transformed_coordinates(src_x, src_y + r->height, + transform, + &sx, &sy); + gen3_emit_composite_dstcoord(sna, dst_x, dst_y + r->height); + OUT_VERTEX(sx); + OUT_VERTEX(sy); + + sna_get_transformed_coordinates(src_x, src_y, + transform, + &sx, &sy); + gen3_emit_composite_dstcoord(sna, dst_x, dst_y); + OUT_VERTEX(sx); + OUT_VERTEX(sy); +} + +fastcall static void +gen3_emit_composite_primitive_identity_source(struct sna *sna, + const struct sna_composite_op *op, + const struct sna_composite_rectangles *r) +{ + float w = r->width; + float h = r->height; + float *v; + + v = sna->render.vertex_data + sna->render.vertex_used; + sna->render.vertex_used += 12; + + v[8] = v[4] = r->dst.x + op->dst.x; + v[0] = v[4] + w; + + v[9] = r->dst.y + op->dst.y; + v[5] = v[1] = v[9] + h; + + v[10] = v[6] = (r->src.x + op->src.offset[0]) * op->src.scale[0]; + v[2] = v[6] + w * op->src.scale[0]; + + v[11] = (r->src.y + op->src.offset[1]) * op->src.scale[1]; + v[7] = v[3] = v[11] + h * op->src.scale[1]; +} + +fastcall static void +gen3_emit_composite_primitive_affine_source(struct sna *sna, + const struct sna_composite_op *op, + const struct sna_composite_rectangles *r) +{ + PictTransform *transform = op->src.transform; + int16_t dst_x = r->dst.x + op->dst.x; + int16_t dst_y = r->dst.y + op->dst.y; + int src_x = r->src.x + (int)op->src.offset[0]; + int src_y = r->src.y + (int)op->src.offset[1]; + float sx, sy; + + _sna_get_transformed_coordinates(src_x + r->width, src_y + r->height, + transform, + &sx, &sy); + + gen3_emit_composite_dstcoord(sna, dst_x + r->width, dst_y + r->height); + OUT_VERTEX(sx * op->src.scale[0]); + OUT_VERTEX(sy * op->src.scale[1]); + + _sna_get_transformed_coordinates(src_x, src_y + r->height, + transform, + &sx, &sy); + gen3_emit_composite_dstcoord(sna, dst_x, dst_y + r->height); + OUT_VERTEX(sx * op->src.scale[0]); + OUT_VERTEX(sy * op->src.scale[1]); + + _sna_get_transformed_coordinates(src_x, src_y, + transform, + &sx, &sy); + gen3_emit_composite_dstcoord(sna, dst_x, dst_y); + OUT_VERTEX(sx * op->src.scale[0]); + OUT_VERTEX(sy * op->src.scale[1]); +} + +fastcall static void +gen3_emit_composite_primitive_constant_identity_mask(struct sna *sna, + const struct sna_composite_op *op, + const struct sna_composite_rectangles *r) +{ + float w = r->width; + float h = r->height; + float *v; + + v = sna->render.vertex_data + sna->render.vertex_used; + sna->render.vertex_used += 12; + + v[8] = v[4] = r->dst.x + op->dst.x; + v[0] = v[4] + w; + + v[9] = r->dst.y + op->dst.y; + v[5] = v[1] = v[9] + h; + + v[10] = v[6] = (r->mask.x + op->mask.offset[0]) * op->mask.scale[0]; + v[2] = v[6] + w * op->mask.scale[0]; + + v[11] = (r->mask.y + op->mask.offset[1]) * op->mask.scale[1]; + v[7] = v[3] = v[11] + h * op->mask.scale[1]; +} + +fastcall static void +gen3_emit_composite_primitive_identity_source_mask(struct sna *sna, + const struct sna_composite_op *op, + const struct sna_composite_rectangles *r) +{ + float dst_x, dst_y; + float src_x, src_y; + float msk_x, msk_y; + float w, h; + float *v; + + dst_x = r->dst.x + op->dst.x; + dst_y = r->dst.y + op->dst.y; + src_x = r->src.x + op->src.offset[0]; + src_y = r->src.y + op->src.offset[1]; + msk_x = r->mask.x + op->mask.offset[0]; + msk_y = r->mask.y + op->mask.offset[1]; + w = r->width; + h = r->height; + + v = sna->render.vertex_data + sna->render.vertex_used; + sna->render.vertex_used += 18; + + v[0] = dst_x + w; + v[1] = dst_y + h; + v[2] = (src_x + w) * op->src.scale[0]; + v[3] = (src_y + h) * op->src.scale[1]; + v[4] = (msk_x + w) * op->mask.scale[0]; + v[5] = (msk_y + h) * op->mask.scale[1]; + + v[6] = dst_x; + v[7] = v[1]; + v[8] = src_x * op->src.scale[0]; + v[9] = v[3]; + v[10] = msk_x * op->mask.scale[0]; + v[11] =v[5]; + + v[12] = v[6]; + v[13] = dst_y; + v[14] = v[8]; + v[15] = src_y * op->src.scale[1]; + v[16] = v[10]; + v[17] = msk_y * op->mask.scale[1]; +} + +fastcall static void +gen3_emit_composite_primitive_affine_source_mask(struct sna *sna, + const struct sna_composite_op *op, + const struct sna_composite_rectangles *r) +{ + int16_t src_x, src_y; + float dst_x, dst_y; + float msk_x, msk_y; + float w, h; + float *v; + + dst_x = r->dst.x + op->dst.x; + dst_y = r->dst.y + op->dst.y; + src_x = r->src.x + op->src.offset[0]; + src_y = r->src.y + op->src.offset[1]; + msk_x = r->mask.x + op->mask.offset[0]; + msk_y = r->mask.y + op->mask.offset[1]; + w = r->width; + h = r->height; + + v = sna->render.vertex_data + sna->render.vertex_used; + sna->render.vertex_used += 18; + + v[0] = dst_x + w; + v[1] = dst_y + h; + sna_get_transformed_coordinates(src_x + r->width, src_y + r->height, + op->src.transform, + &v[2], &v[3]); + v[2] *= op->src.scale[0]; + v[3] *= op->src.scale[1]; + v[4] = (msk_x + w) * op->mask.scale[0]; + v[5] = (msk_y + h) * op->mask.scale[1]; + + v[6] = dst_x; + v[7] = v[1]; + sna_get_transformed_coordinates(src_x, src_y + r->height, + op->src.transform, + &v[8], &v[9]); + v[8] *= op->src.scale[0]; + v[9] *= op->src.scale[1]; + v[10] = msk_x * op->mask.scale[0]; + v[11] =v[5]; + + v[12] = v[6]; + v[13] = dst_y; + sna_get_transformed_coordinates(src_x, src_y, + op->src.transform, + &v[14], &v[15]); + v[14] *= op->src.scale[0]; + v[15] *= op->src.scale[1]; + v[16] = v[10]; + v[17] = msk_y * op->mask.scale[1]; +} + +static void +gen3_emit_composite_texcoord(struct sna *sna, + const struct sna_composite_channel *channel, + int16_t x, int16_t y) +{ + float s = 0, t = 0, w = 1; + + switch (channel->gen3.type) { + case SHADER_OPACITY: + case SHADER_NONE: + case SHADER_ZERO: + case SHADER_CONSTANT: + break; + + case SHADER_LINEAR: + case SHADER_RADIAL: + case SHADER_TEXTURE: + x += channel->offset[0]; + y += channel->offset[1]; + if (channel->is_affine) { + sna_get_transformed_coordinates(x, y, + channel->transform, + &s, &t); + OUT_VERTEX(s * channel->scale[0]); + OUT_VERTEX(t * channel->scale[1]); + } else { + sna_get_transformed_coordinates_3d(x, y, + channel->transform, + &s, &t, &w); + OUT_VERTEX(s * channel->scale[0]); + OUT_VERTEX(t * channel->scale[1]); + OUT_VERTEX(0); + OUT_VERTEX(w); + } + break; + } +} + +static void +gen3_emit_composite_vertex(struct sna *sna, + const struct sna_composite_op *op, + int16_t srcX, int16_t srcY, + int16_t maskX, int16_t maskY, + int16_t dstX, int16_t dstY) +{ + gen3_emit_composite_dstcoord(sna, dstX, dstY); + gen3_emit_composite_texcoord(sna, &op->src, srcX, srcY); + gen3_emit_composite_texcoord(sna, &op->mask, maskX, maskY); +} + +fastcall static void +gen3_emit_composite_primitive(struct sna *sna, + const struct sna_composite_op *op, + const struct sna_composite_rectangles *r) +{ + gen3_emit_composite_vertex(sna, op, + r->src.x + r->width, + r->src.y + r->height, + r->mask.x + r->width, + r->mask.y + r->height, + op->dst.x + r->dst.x + r->width, + op->dst.y + r->dst.y + r->height); + gen3_emit_composite_vertex(sna, op, + r->src.x, + r->src.y + r->height, + r->mask.x, + r->mask.y + r->height, + op->dst.x + r->dst.x, + op->dst.y + r->dst.y + r->height); + gen3_emit_composite_vertex(sna, op, + r->src.x, + r->src.y, + r->mask.x, + r->mask.y, + op->dst.x + r->dst.x, + op->dst.y + r->dst.y); +} + +static inline void +gen3_2d_perspective(struct sna *sna, int in, int out) +{ + gen3_fs_rcp(out, 0, gen3_fs_operand(in, W, W, W, W)); + gen3_fs_mul(out, + gen3_fs_operand(in, X, Y, ZERO, ONE), + gen3_fs_operand_reg(out)); +} + +static inline void +gen3_linear_coord(struct sna *sna, + const struct sna_composite_channel *channel, + int in, int out) +{ + int c = channel->gen3.constants; + + if (!channel->is_affine) { + gen3_2d_perspective(sna, in, FS_U0); + in = FS_U0; + } + + gen3_fs_mov(out, gen3_fs_operand_zero()); + gen3_fs_dp3(out, MASK_X, + gen3_fs_operand(in, X, Y, ONE, ZERO), + gen3_fs_operand_reg(c)); +} + +static void +gen3_radial_coord(struct sna *sna, + const struct sna_composite_channel *channel, + int in, int out) +{ + int c = channel->gen3.constants; + + if (!channel->is_affine) { + gen3_2d_perspective(sna, in, FS_U0); + in = FS_U0; + } + + switch (channel->gen3.mode) { + case RADIAL_ONE: + /* + pdx = (x - c1x) / dr, pdy = (y - c1y) / dr; + r² = pdx*pdx + pdy*pdy + t = r²/sqrt(r²) - r1/dr; + */ + gen3_fs_mad(FS_U0, MASK_X | MASK_Y, + gen3_fs_operand(in, X, Y, ZERO, ZERO), + gen3_fs_operand(c, Z, Z, ZERO, ZERO), + gen3_fs_operand(c, NEG_X, NEG_Y, ZERO, ZERO)); + gen3_fs_dp2add(FS_U0, MASK_X, + gen3_fs_operand(FS_U0, X, Y, ZERO, ZERO), + gen3_fs_operand(FS_U0, X, Y, ZERO, ZERO), + gen3_fs_operand_zero()); + gen3_fs_rsq(out, MASK_X, gen3_fs_operand(FS_U0, X, X, X, X)); + gen3_fs_mad(out, 0, + gen3_fs_operand(FS_U0, X, ZERO, ZERO, ZERO), + gen3_fs_operand(out, X, ZERO, ZERO, ZERO), + gen3_fs_operand(c, W, ZERO, ZERO, ZERO)); + break; + + case RADIAL_TWO: + /* + pdx = x - c1x, pdy = y - c1y; + A = dx² + dy² - dr² + B = -2*(pdx*dx + pdy*dy + r1*dr); + C = pdx² + pdy² - r1²; + det = B*B - 4*A*C; + t = (-B + sqrt (det)) / (2 * A) + */ + + /* u0.x = pdx, u0.y = pdy, u[0].z = r1; */ + gen3_fs_add(FS_U0, + gen3_fs_operand(in, X, Y, ZERO, ZERO), + gen3_fs_operand(c, X, Y, Z, ZERO)); + /* u0.x = pdx, u0.y = pdy, u[0].z = r1, u[0].w = B; */ + gen3_fs_dp3(FS_U0, MASK_W, + gen3_fs_operand(FS_U0, X, Y, ONE, ZERO), + gen3_fs_operand(c+1, X, Y, Z, ZERO)); + /* u1.x = pdx² + pdy² - r1²; [C] */ + gen3_fs_dp3(FS_U1, MASK_X, + gen3_fs_operand(FS_U0, X, Y, Z, ZERO), + gen3_fs_operand(FS_U0, X, Y, NEG_Z, ZERO)); + /* u1.x = C, u1.y = B, u1.z=-4*A; */ + gen3_fs_mov_masked(FS_U1, MASK_Y, gen3_fs_operand(FS_U0, W, W, W, W)); + gen3_fs_mov_masked(FS_U1, MASK_Z, gen3_fs_operand(c, W, W, W, W)); + /* u1.x = B² - 4*A*C */ + gen3_fs_dp2add(FS_U1, MASK_X, + gen3_fs_operand(FS_U1, X, Y, ZERO, ZERO), + gen3_fs_operand(FS_U1, Z, Y, ZERO, ZERO), + gen3_fs_operand_zero()); + /* out.x = -B + sqrt (B² - 4*A*C), */ + gen3_fs_rsq(out, MASK_X, gen3_fs_operand(FS_U1, X, X, X, X)); + gen3_fs_mad(out, MASK_X, + gen3_fs_operand(out, X, ZERO, ZERO, ZERO), + gen3_fs_operand(FS_U1, X, ZERO, ZERO, ZERO), + gen3_fs_operand(FS_U0, NEG_W, ZERO, ZERO, ZERO)); + /* out.x = (-B + sqrt (B² - 4*A*C)) / (2 * A), */ + gen3_fs_mul(out, + gen3_fs_operand(out, X, ZERO, ZERO, ZERO), + gen3_fs_operand(c+1, W, ZERO, ZERO, ZERO)); + break; + } +} + +static void +gen3_composite_emit_shader(struct sna *sna, + const struct sna_composite_op *op, + uint8_t blend) +{ + Bool dst_is_alpha = PIXMAN_FORMAT_RGB(op->dst.format) == 0; + const struct sna_composite_channel *src, *mask; + struct gen3_render_state *state = &sna->render_state.gen3; + uint32_t shader_offset, id; + int src_reg, mask_reg; + int t, length; + + src = &op->src; + mask = &op->mask; + if (mask->gen3.type == SHADER_NONE) + mask = NULL; + + if (mask && src->is_opaque && + gen3_blend_op[blend].src_alpha && + op->has_component_alpha) { + src = mask; + mask = NULL; + } + + id = (src->gen3.type | + src->is_affine << 4 | + src->alpha_fixup << 5 | + src->rb_reversed << 6); + if (mask) { + id |= (mask->gen3.type << 8 | + mask->is_affine << 12 | + gen3_blend_op[blend].src_alpha << 13 | + op->has_component_alpha << 14 | + mask->alpha_fixup << 15 | + mask->rb_reversed << 16); + } + id |= dst_is_alpha << 24; + id |= op->rb_reversed << 25; + + if (id == state->last_shader) + return; + + state->last_shader = id; + + shader_offset = sna->kgem.nbatch++; + t = 0; + switch (src->gen3.type) { + case SHADER_NONE: + case SHADER_OPACITY: + assert(0); + case SHADER_ZERO: + break; + case SHADER_CONSTANT: + gen3_fs_dcl(FS_T8); + src_reg = FS_T8; + break; + case SHADER_TEXTURE: + case SHADER_RADIAL: + case SHADER_LINEAR: + gen3_fs_dcl(FS_S0); + gen3_fs_dcl(FS_T0); + t++; + break; + } + + if (mask == NULL) { + if (src->gen3.type == SHADER_ZERO) { + gen3_fs_mov(FS_OC, gen3_fs_operand_zero()); + goto done; + } + if (src->alpha_fixup && dst_is_alpha) { + gen3_fs_mov(FS_OC, gen3_fs_operand_one()); + goto done; + } + /* No mask, so load directly to output color */ + if (src->gen3.type != SHADER_CONSTANT) { + if (dst_is_alpha || src->rb_reversed ^ op->rb_reversed) + src_reg = FS_R0; + else + src_reg = FS_OC; + } + switch (src->gen3.type) { + case SHADER_LINEAR: + gen3_linear_coord(sna, src, FS_T0, FS_R0); + gen3_fs_texld(src_reg, FS_S0, FS_R0); + break; + + case SHADER_RADIAL: + gen3_radial_coord(sna, src, FS_T0, FS_R0); + gen3_fs_texld(src_reg, FS_S0, FS_R0); + break; + + case SHADER_TEXTURE: + if (src->is_affine) + gen3_fs_texld(src_reg, FS_S0, FS_T0); + else + gen3_fs_texldp(src_reg, FS_S0, FS_T0); + break; + + case SHADER_NONE: + case SHADER_CONSTANT: + case SHADER_ZERO: + break; + } + + if (src_reg != FS_OC) { + if (src->alpha_fixup) + gen3_fs_mov(FS_OC, + src->rb_reversed ^ op->rb_reversed ? + gen3_fs_operand(src_reg, Z, Y, X, ONE) : + gen3_fs_operand(src_reg, X, Y, Z, ONE)); + else if (dst_is_alpha) + gen3_fs_mov(FS_OC, gen3_fs_operand(src_reg, W, W, W, W)); + else if (src->rb_reversed ^ op->rb_reversed) + gen3_fs_mov(FS_OC, gen3_fs_operand(src_reg, Z, Y, X, W)); + else + gen3_fs_mov(FS_OC, gen3_fs_operand_reg(src_reg)); + } else if (src->alpha_fixup) + gen3_fs_mov_masked(FS_OC, MASK_W, gen3_fs_operand_one()); + } else { + int out_reg = FS_OC; + if (op->rb_reversed) + out_reg = FS_U0; + + switch (mask->gen3.type) { + case SHADER_CONSTANT: + gen3_fs_dcl(FS_T9); + mask_reg = FS_T9; + break; + case SHADER_TEXTURE: + case SHADER_LINEAR: + case SHADER_RADIAL: + gen3_fs_dcl(FS_S0 + t); + case SHADER_OPACITY: + gen3_fs_dcl(FS_T0 + t); + break; + case SHADER_NONE: + case SHADER_ZERO: + assert(0); + break; + } + + t = 0; + switch (src->gen3.type) { + case SHADER_LINEAR: + gen3_linear_coord(sna, src, FS_T0, FS_R0); + gen3_fs_texld(FS_R0, FS_S0, FS_R0); + src_reg = FS_R0; + t++; + break; + + case SHADER_RADIAL: + gen3_radial_coord(sna, src, FS_T0, FS_R0); + gen3_fs_texld(FS_R0, FS_S0, FS_R0); + src_reg = FS_R0; + t++; + break; + + case SHADER_TEXTURE: + if (src->is_affine) + gen3_fs_texld(FS_R0, FS_S0, FS_T0); + else + gen3_fs_texldp(FS_R0, FS_S0, FS_T0); + src_reg = FS_R0; + t++; + break; + + case SHADER_CONSTANT: + case SHADER_NONE: + case SHADER_ZERO: + break; + } + if (src->alpha_fixup) + gen3_fs_mov_masked(src_reg, MASK_W, gen3_fs_operand_one()); + if (src->rb_reversed) + gen3_fs_mov(src_reg, gen3_fs_operand(src_reg, Z, Y, X, W)); + + switch (mask->gen3.type) { + case SHADER_LINEAR: + gen3_linear_coord(sna, mask, FS_T0 + t, FS_R1); + gen3_fs_texld(FS_R1, FS_S0 + t, FS_R1); + mask_reg = FS_R1; + break; + + case SHADER_RADIAL: + gen3_radial_coord(sna, mask, FS_T0 + t, FS_R1); + gen3_fs_texld(FS_R1, FS_S0 + t, FS_R1); + mask_reg = FS_R1; + break; + + case SHADER_TEXTURE: + if (mask->is_affine) + gen3_fs_texld(FS_R1, FS_S0 + t, FS_T0 + t); + else + gen3_fs_texldp(FS_R1, FS_S0 + t, FS_T0 + t); + mask_reg = FS_R1; + break; + + case SHADER_OPACITY: + if (dst_is_alpha) { + gen3_fs_mul(out_reg, + gen3_fs_operand(src_reg, W, W, W, W), + gen3_fs_operand(FS_T0 + t, X, X, X, X)); + } else { + gen3_fs_mul(out_reg, + gen3_fs_operand(src_reg, X, Y, Z, W), + gen3_fs_operand(FS_T0 + t, X, X, X, X)); + } + goto mask_done; + + case SHADER_CONSTANT: + case SHADER_NONE: + case SHADER_ZERO: + break; + } + if (mask->alpha_fixup) + gen3_fs_mov_masked(mask_reg, MASK_W, gen3_fs_operand_one()); + if (mask->rb_reversed) + gen3_fs_mov(mask_reg, gen3_fs_operand(mask_reg, Z, Y, X, W)); + + if (dst_is_alpha) { + gen3_fs_mul(out_reg, + gen3_fs_operand(src_reg, W, W, W, W), + gen3_fs_operand(mask_reg, W, W, W, W)); + } else { + /* If component alpha is active in the mask and the blend + * operation uses the source alpha, then we know we don't + * need the source value (otherwise we would have hit a + * fallback earlier), so we provide the source alpha (src.A * + * mask.X) as output color. + * Conversely, if CA is set and we don't need the source alpha, + * then we produce the source value (src.X * mask.X) and the + * source alpha is unused. Otherwise, we provide the non-CA + * source value (src.X * mask.A). + */ + if (op->has_component_alpha) { + if (gen3_blend_op[blend].src_alpha) + gen3_fs_mul(out_reg, + gen3_fs_operand(src_reg, W, W, W, W), + gen3_fs_operand_reg(mask_reg)); + else + gen3_fs_mul(out_reg, + gen3_fs_operand_reg(src_reg), + gen3_fs_operand_reg(mask_reg)); + } else { + gen3_fs_mul(out_reg, + gen3_fs_operand_reg(src_reg), + gen3_fs_operand(mask_reg, W, W, W, W)); + } + } +mask_done: + if (op->rb_reversed) + gen3_fs_mov(FS_OC, gen3_fs_operand(FS_U0, Z, Y, X, W)); + } + +done: + length = sna->kgem.nbatch - shader_offset; + sna->kgem.batch[shader_offset] = + _3DSTATE_PIXEL_SHADER_PROGRAM | (length - 2); +} + +static uint32_t gen3_ms_tiling(uint32_t tiling) +{ + uint32_t v = 0; + switch (tiling) { + case I915_TILING_Y: v |= MS3_TILE_WALK; + case I915_TILING_X: v |= MS3_TILED_SURFACE; + case I915_TILING_NONE: break; + } + return v; +} + +static void gen3_emit_invariant(struct sna *sna) +{ + /* Disable independent alpha blend */ + OUT_BATCH(_3DSTATE_INDEPENDENT_ALPHA_BLEND_CMD | IAB_MODIFY_ENABLE | + IAB_MODIFY_FUNC | BLENDFUNC_ADD << IAB_FUNC_SHIFT | + IAB_MODIFY_SRC_FACTOR | BLENDFACT_ONE << IAB_SRC_FACTOR_SHIFT | + IAB_MODIFY_DST_FACTOR | BLENDFACT_ZERO << IAB_DST_FACTOR_SHIFT); + + OUT_BATCH(_3DSTATE_COORD_SET_BINDINGS | + CSB_TCB(0, 0) | + CSB_TCB(1, 1) | + CSB_TCB(2, 2) | + CSB_TCB(3, 3) | + CSB_TCB(4, 4) | + CSB_TCB(5, 5) | + CSB_TCB(6, 6) | + CSB_TCB(7, 7)); + + OUT_BATCH(_3DSTATE_MODES_4_CMD | + ENABLE_LOGIC_OP_FUNC | + LOGIC_OP_FUNC(LOGICOP_COPY)); + + OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(3) | I1_LOAD_S(4) | I1_LOAD_S(5) | 2); + OUT_BATCH(0x00000000); /* Disable texture coordinate wrap-shortest */ + OUT_BATCH((1 << S4_POINT_WIDTH_SHIFT) | + S4_LINE_WIDTH_ONE | + S4_CULLMODE_NONE | + S4_VFMT_XY); + OUT_BATCH(0x00000000); /* Stencil. */ + + OUT_BATCH(_3DSTATE_SCISSOR_ENABLE_CMD | DISABLE_SCISSOR_RECT); + OUT_BATCH(_3DSTATE_DEPTH_SUBRECT_DISABLE); + + OUT_BATCH(_3DSTATE_LOAD_INDIRECT); + OUT_BATCH(0x00000000); + + OUT_BATCH(_3DSTATE_STIPPLE); + OUT_BATCH(0x00000000); + + sna->render_state.gen3.need_invariant = FALSE; +} + +static void +gen3_get_batch(struct sna *sna, + const struct sna_composite_op *op) +{ +#define MAX_OBJECTS 3 /* worst case: dst + src + mask */ + + kgem_set_mode(&sna->kgem, KGEM_RENDER); + + if (!kgem_check_batch(&sna->kgem, 200)) { + DBG(("%s: flushing batch: size %d > %d\n", + __FUNCTION__, 200, + sna->kgem.surface-sna->kgem.nbatch)); + kgem_submit(&sna->kgem); + } + + if (sna->kgem.nreloc > KGEM_RELOC_SIZE(&sna->kgem) - MAX_OBJECTS) { + DBG(("%s: flushing batch: reloc %d >= %d\n", + __FUNCTION__, + sna->kgem.nreloc, + (int)KGEM_RELOC_SIZE(&sna->kgem) - MAX_OBJECTS)); + kgem_submit(&sna->kgem); + } + + if (sna->kgem.nexec > KGEM_EXEC_SIZE(&sna->kgem) - MAX_OBJECTS - 1) { + DBG(("%s: flushing batch: exec %d >= %d\n", + __FUNCTION__, + sna->kgem.nexec, + (int)KGEM_EXEC_SIZE(&sna->kgem) - MAX_OBJECTS - 1)); + kgem_submit(&sna->kgem); + } + + if (sna->render_state.gen3.need_invariant) + gen3_emit_invariant(sna); +#undef MAX_OBJECTS +} + +static void gen3_emit_composite_state(struct sna *sna, + const struct sna_composite_op *op) +{ + struct gen3_render_state *state = &sna->render_state.gen3; + uint32_t map[4]; + uint32_t sampler[4]; + struct kgem_bo *bo[2]; + int tex_count, n; + uint32_t ss2; + + gen3_get_batch(sna, op); + + /* BUF_INFO is an implicit flush, so skip if the target is unchanged. */ + if (op->dst.bo->unique_id != state->current_dst) { + uint32_t v; + + OUT_BATCH(_3DSTATE_BUF_INFO_CMD); + OUT_BATCH(BUF_3D_ID_COLOR_BACK | + gen3_buf_tiling(op->dst.bo->tiling) | + op->dst.bo->pitch); + OUT_BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch, + op->dst.bo, + I915_GEM_DOMAIN_RENDER << 16 | + I915_GEM_DOMAIN_RENDER, + 0)); + + OUT_BATCH(_3DSTATE_DST_BUF_VARS_CMD); + OUT_BATCH(gen3_get_dst_format(op->dst.format)); + + v = (DRAW_YMAX(op->dst.height - 1) | + DRAW_XMAX(op->dst.width - 1)); + if (v != state->last_drawrect_limit) { + OUT_BATCH(_3DSTATE_DRAW_RECT_CMD); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(v); + OUT_BATCH(0); + state->last_drawrect_limit = v; + } + + state->current_dst = op->dst.bo->unique_id; + } + kgem_bo_mark_dirty(op->dst.bo); + + ss2 = ~0; + tex_count = 0; + switch (op->src.gen3.type) { + case SHADER_OPACITY: + case SHADER_NONE: + assert(0); + case SHADER_ZERO: + break; + case SHADER_CONSTANT: + if (op->src.gen3.mode != state->last_diffuse) { + OUT_BATCH(_3DSTATE_DFLT_DIFFUSE_CMD); + OUT_BATCH(op->src.gen3.mode); + state->last_diffuse = op->src.gen3.mode; + } + break; + case SHADER_LINEAR: + case SHADER_RADIAL: + case SHADER_TEXTURE: + ss2 &= ~S2_TEXCOORD_FMT(tex_count, TEXCOORDFMT_NOT_PRESENT); + ss2 |= S2_TEXCOORD_FMT(tex_count, + op->src.is_affine ? TEXCOORDFMT_2D : TEXCOORDFMT_4D); + map[tex_count * 2 + 0] = + op->src.card_format | + gen3_ms_tiling(op->src.bo->tiling) | + (op->src.height - 1) << MS3_HEIGHT_SHIFT | + (op->src.width - 1) << MS3_WIDTH_SHIFT; + map[tex_count * 2 + 1] = + (op->src.bo->pitch / 4 - 1) << MS4_PITCH_SHIFT; + + sampler[tex_count * 2 + 0] = op->src.filter; + sampler[tex_count * 2 + 1] = + op->src.repeat | + tex_count << SS3_TEXTUREMAP_INDEX_SHIFT; + bo[tex_count] = op->src.bo; + tex_count++; + break; + } + switch (op->mask.gen3.type) { + case SHADER_NONE: + case SHADER_ZERO: + break; + case SHADER_CONSTANT: + if (op->mask.gen3.mode != state->last_specular) { + OUT_BATCH(_3DSTATE_DFLT_SPEC_CMD); + OUT_BATCH(op->mask.gen3.mode); + state->last_specular = op->mask.gen3.mode; + } + break; + case SHADER_LINEAR: + case SHADER_RADIAL: + case SHADER_TEXTURE: + ss2 &= ~S2_TEXCOORD_FMT(tex_count, TEXCOORDFMT_NOT_PRESENT); + ss2 |= S2_TEXCOORD_FMT(tex_count, + op->mask.is_affine ? TEXCOORDFMT_2D : TEXCOORDFMT_4D); + map[tex_count * 2 + 0] = + op->mask.card_format | + gen3_ms_tiling(op->mask.bo->tiling) | + (op->mask.height - 1) << MS3_HEIGHT_SHIFT | + (op->mask.width - 1) << MS3_WIDTH_SHIFT; + map[tex_count * 2 + 1] = + (op->mask.bo->pitch / 4 - 1) << MS4_PITCH_SHIFT; + + sampler[tex_count * 2 + 0] = op->mask.filter; + sampler[tex_count * 2 + 1] = + op->mask.repeat | + tex_count << SS3_TEXTUREMAP_INDEX_SHIFT; + bo[tex_count] = op->mask.bo; + tex_count++; + break; + case SHADER_OPACITY: + ss2 &= ~S2_TEXCOORD_FMT(tex_count, TEXCOORDFMT_NOT_PRESENT); + ss2 |= S2_TEXCOORD_FMT(tex_count, TEXCOORDFMT_1D); + break; + } + + { + uint32_t blend_offset = sna->kgem.nbatch; + + OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(2) | I1_LOAD_S(6) | 1); + OUT_BATCH(ss2); + OUT_BATCH(gen3_get_blend_cntl(op->op, + op->has_component_alpha, + op->dst.format)); + + if (memcmp(sna->kgem.batch + state->last_blend + 1, + sna->kgem.batch + blend_offset + 1, + 2 * 4) == 0) + sna->kgem.nbatch = blend_offset; + else + state->last_blend = blend_offset; + } + + if (op->u.gen3.num_constants) { + int count = op->u.gen3.num_constants; + if (state->last_constants) { + int last = sna->kgem.batch[state->last_constants+1]; + if (last == (1 << (count >> 2)) - 1 && + memcmp(&sna->kgem.batch[state->last_constants+2], + op->u.gen3.constants, + count * sizeof(uint32_t)) == 0) + count = 0; + } + if (count) { + state->last_constants = sna->kgem.nbatch; + OUT_BATCH(_3DSTATE_PIXEL_SHADER_CONSTANTS | count); + OUT_BATCH((1 << (count >> 2)) - 1); + + memcpy(sna->kgem.batch + sna->kgem.nbatch, + op->u.gen3.constants, + count * sizeof(uint32_t)); + sna->kgem.nbatch += count; + } + } + + if (tex_count != 0) { + uint32_t rewind; + + n = 0; + if (tex_count == state->tex_count) { + for (; n < tex_count; n++) { + if (map[2*n+0] != state->tex_map[2*n+0] || + map[2*n+1] != state->tex_map[2*n+1] || + state->tex_handle[n] != bo[n]->handle || + state->tex_delta[n] != bo[n]->delta) + break; + } + } + if (n < tex_count) { + OUT_BATCH(_3DSTATE_MAP_STATE | (3 * tex_count)); + OUT_BATCH((1 << tex_count) - 1); + for (n = 0; n < tex_count; n++) { + OUT_BATCH(kgem_add_reloc(&sna->kgem, + sna->kgem.nbatch, + bo[n], + I915_GEM_DOMAIN_SAMPLER<< 16, + 0)); + OUT_BATCH(map[2*n + 0]); + OUT_BATCH(map[2*n + 1]); + + state->tex_map[2*n+0] = map[2*n+0]; + state->tex_map[2*n+1] = map[2*n+1]; + state->tex_handle[n] = bo[n]->handle; + state->tex_delta[n] = bo[n]->delta; + } + state->tex_count = n; + } + + rewind = sna->kgem.nbatch; + OUT_BATCH(_3DSTATE_SAMPLER_STATE | (3 * tex_count)); + OUT_BATCH((1 << tex_count) - 1); + for (n = 0; n < tex_count; n++) { + OUT_BATCH(sampler[2*n + 0]); + OUT_BATCH(sampler[2*n + 1]); + OUT_BATCH(0); + } + if (state->last_sampler && + memcmp(&sna->kgem.batch[state->last_sampler+1], + &sna->kgem.batch[rewind + 1], + (3*tex_count + 1)*sizeof(uint32_t)) == 0) + sna->kgem.nbatch = rewind; + else + state->last_sampler = rewind; + } + + gen3_composite_emit_shader(sna, op, op->op); +} + +static void gen3_magic_ca_pass(struct sna *sna, + const struct sna_composite_op *op) +{ + if (!op->need_magic_ca_pass) + return; + + DBG(("%s(%d)\n", __FUNCTION__, + sna->render.vertex_index - sna->render.vertex_start)); + + OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | I1_LOAD_S(6) | 0); + OUT_BATCH(gen3_get_blend_cntl(PictOpAdd, TRUE, op->dst.format)); + gen3_composite_emit_shader(sna, op, PictOpAdd); + + OUT_BATCH(PRIM3D_RECTLIST | PRIM3D_INDIRECT_SEQUENTIAL | + (sna->render.vertex_index - sna->render.vertex_start)); + OUT_BATCH(sna->render.vertex_start); +} + +static void gen3_vertex_flush(struct sna *sna) +{ + if (sna->render_state.gen3.vertex_offset == 0 || + sna->render.vertex_index == sna->render.vertex_start) + return; + + DBG(("%s[%x] = %d\n", __FUNCTION__, + 4*sna->render_state.gen3.vertex_offset, + sna->render.vertex_index - sna->render.vertex_start)); + + sna->kgem.batch[sna->render_state.gen3.vertex_offset] = + PRIM3D_RECTLIST | PRIM3D_INDIRECT_SEQUENTIAL | + (sna->render.vertex_index - sna->render.vertex_start); + sna->kgem.batch[sna->render_state.gen3.vertex_offset + 1] = + sna->render.vertex_start; + + if (sna->render.op) + gen3_magic_ca_pass(sna, sna->render.op); + + sna->render_state.gen3.vertex_offset = 0; +} + +static void gen3_vertex_finish(struct sna *sna, Bool last) +{ + struct kgem_bo *bo; + int delta; + + DBG(("%s: last? %d\n", __FUNCTION__, last)); + + gen3_vertex_flush(sna); + if (!sna->render.vertex_used) + return; + + if (last && sna->kgem.nbatch + sna->render.vertex_used <= sna->kgem.surface) { + DBG(("%s: copy to batch: %d @ %d\n", __FUNCTION__, + sna->render.vertex_used, sna->kgem.nbatch)); + memcpy(sna->kgem.batch + sna->kgem.nbatch, + sna->render.vertex_data, + sna->render.vertex_used * 4); + delta = sna->kgem.nbatch * 4; + bo = NULL; + sna->kgem.nbatch += sna->render.vertex_used; + } else { + bo = kgem_create_linear(&sna->kgem, 4*sna->render.vertex_used); + if (bo && !kgem_bo_write(&sna->kgem, bo, + sna->render.vertex_data, + 4*sna->render.vertex_used)) { + kgem_bo_destroy(&sna->kgem, bo); + return; + } + delta = 0; + DBG(("%s: new vbo: %d\n", __FUNCTION__, + sna->render.vertex_used)); + } + + DBG(("%s: reloc = %d\n", __FUNCTION__, + sna->render.vertex_reloc[0])); + + sna->kgem.batch[sna->render.vertex_reloc[0]] = + kgem_add_reloc(&sna->kgem, + sna->render.vertex_reloc[0], + bo, + I915_GEM_DOMAIN_VERTEX << 16, + delta); + sna->render.vertex_reloc[0] = 0; + sna->render.vertex_used = 0; + sna->render.vertex_index = 0; + + if (bo) + kgem_bo_destroy(&sna->kgem, bo); +} + +static bool gen3_rectangle_begin(struct sna *sna, + const struct sna_composite_op *op) +{ + int ndwords, i1_cmd = 0, i1_len = 0; + struct gen3_render_state *state = &sna->render_state.gen3; + + ndwords = 0; + if (state->vertex_offset == 0) { + ndwords += 2; + if (op->need_magic_ca_pass) + ndwords += 100; + } + if (sna->render.vertex_reloc[0] == 0) + i1_len++, i1_cmd |= I1_LOAD_S(0), ndwords++; + if (state->floats_per_vertex != op->floats_per_vertex) + i1_len++, i1_cmd |= I1_LOAD_S(1), ndwords++; + if (ndwords == 0) + return true; + + if (!kgem_check_batch(&sna->kgem, ndwords+1)) + return false; + + if (i1_cmd) { + OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | i1_cmd | (i1_len - 1)); + if (sna->render.vertex_reloc[0] == 0) + sna->render.vertex_reloc[0] = sna->kgem.nbatch++; + if (state->floats_per_vertex != op->floats_per_vertex) { + state->floats_per_vertex = op->floats_per_vertex; + OUT_BATCH(state->floats_per_vertex << S1_VERTEX_WIDTH_SHIFT | + state->floats_per_vertex << S1_VERTEX_PITCH_SHIFT); + } + } + + if (state->vertex_offset == 0) { + if (sna->kgem.nbatch == 2 + state->last_vertex_offset) { + state->vertex_offset = state->last_vertex_offset; + } else { + state->vertex_offset = sna->kgem.nbatch; + OUT_BATCH(MI_NOOP); /* to be filled later */ + OUT_BATCH(MI_NOOP); + sna->render.vertex_start = sna->render.vertex_index; + state->last_vertex_offset = state->vertex_offset; + } + } + + return true; +} + +static int gen3_get_rectangles__flush(struct sna *sna, bool ca) +{ + if (!kgem_check_batch(&sna->kgem, ca ? 105: 5)) + return 0; + if (sna->kgem.nexec > KGEM_EXEC_SIZE(&sna->kgem) - 2) + return 0; + if (sna->kgem.nreloc > KGEM_RELOC_SIZE(&sna->kgem) - 1) + return 0; + + gen3_vertex_finish(sna, FALSE); + assert(sna->render.vertex_index == 0); + assert(sna->render.vertex_used == 0); + return ARRAY_SIZE(sna->render.vertex_data); +} + +inline static int gen3_get_rectangles(struct sna *sna, + const struct sna_composite_op *op, + int want) +{ + int rem = vertex_space(sna); + + DBG(("%s: want=%d, rem=%d\n", + __FUNCTION__, 3*want*op->floats_per_vertex, rem)); + + assert(sna->render.vertex_index * op->floats_per_vertex == sna->render.vertex_used); + if (op->floats_per_vertex*3 > rem) { + DBG(("flushing vbo for %s: %d < %d\n", + __FUNCTION__, rem, 3*op->floats_per_vertex)); + rem = gen3_get_rectangles__flush(sna, op->need_magic_ca_pass); + if (rem == 0) + return 0; + } + + if (!gen3_rectangle_begin(sna, op)) { + DBG(("%s: flushing batch\n", __FUNCTION__)); + return 0; + } + + if (want > 1 && want * op->floats_per_vertex*3 > rem) + want = rem / (3*op->floats_per_vertex); + sna->render.vertex_index += 3*want; + + assert(want); + assert(sna->render.vertex_index * op->floats_per_vertex <= ARRAY_SIZE(sna->render.vertex_data)); + return want; +} + +fastcall static void +gen3_render_composite_blt(struct sna *sna, + const struct sna_composite_op *op, + const struct sna_composite_rectangles *r) +{ + DBG(("%s: src=(%d, %d)+(%d, %d), mask=(%d, %d)+(%d, %d), dst=(%d, %d)+(%d, %d), size=(%d, %d)\n", __FUNCTION__, + r->src.x, r->src.y, op->src.offset[0], op->src.offset[1], + r->mask.x, r->mask.y, op->mask.offset[0], op->mask.offset[1], + r->dst.x, r->dst.y, op->dst.x, op->dst.y, + r->width, r->height)); + + if (!gen3_get_rectangles(sna, op, 1)) { + gen3_emit_composite_state(sna, op); + gen3_get_rectangles(sna, op, 1); + } + + op->prim_emit(sna, op, r); +} + +static void +gen3_render_composite_boxes(struct sna *sna, + const struct sna_composite_op *op, + const BoxRec *box, int nbox) +{ + DBG(("%s: nbox=%d, src=+(%d, %d), mask=+(%d, %d), dst=+(%d, %d)\n", + __FUNCTION__, nbox, + op->src.offset[0], op->src.offset[1], + op->mask.offset[0], op->mask.offset[1], + op->dst.x, op->dst.y)); + + do { + int nbox_this_time; + + nbox_this_time = gen3_get_rectangles(sna, op, nbox); + if (nbox_this_time == 0) { + gen3_emit_composite_state(sna, op); + nbox_this_time = gen3_get_rectangles(sna, op, nbox); + } + nbox -= nbox_this_time; + + do { + struct sna_composite_rectangles r; + + DBG((" %s: (%d, %d) x (%d, %d)\n", __FUNCTION__, + box->x1, box->y1, + box->x2 - box->x1, + box->y2 - box->y1)); + + r.dst.x = box->x1; r.dst.y = box->y1; + r.width = box->x2 - box->x1; + r.height = box->y2 - box->y1; + r.src = r.mask = r.dst; + + op->prim_emit(sna, op, &r); + box++; + } while (--nbox_this_time); + } while (nbox); +} + +static void +gen3_render_composite_done(struct sna *sna, + const struct sna_composite_op *op) +{ + assert(sna->render.op == op); + + gen3_vertex_flush(sna); + sna->render.op = NULL; + _kgem_set_mode(&sna->kgem, KGEM_RENDER); + + DBG(("%s()\n", __FUNCTION__)); + + sna_render_composite_redirect_done(sna, op); + + if (op->src.bo) + kgem_bo_destroy(&sna->kgem, op->src.bo); + if (op->mask.bo) + kgem_bo_destroy(&sna->kgem, op->mask.bo); +} + +static void +gen3_render_reset(struct sna *sna) +{ + struct gen3_render_state *state = &sna->render_state.gen3; + + state->need_invariant = TRUE; + state->current_dst = 0; + state->tex_count = 0; + state->last_drawrect_limit = ~0U; + state->last_target = 0; + state->last_blend = 0; + state->last_constants = 0; + state->last_sampler = 0; + state->last_shader = 0; + state->last_diffuse = 0xcc00ffee; + state->last_specular = 0xcc00ffee; + + state->floats_per_vertex = 0; + state->last_floats_per_vertex = 0; + state->last_vertex_offset = 0; + state->vertex_offset = 0; + + assert(sna->render.vertex_used == 0); + assert(sna->render.vertex_index == 0); + assert(sna->render.vertex_reloc[0] == 0); +} + +static Bool gen3_composite_channel_set_format(struct sna_composite_channel *channel, + CARD32 format) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(gen3_tex_formats); i++) { + if (gen3_tex_formats[i].fmt == format) { + channel->card_format = gen3_tex_formats[i].card_fmt; + channel->rb_reversed = gen3_tex_formats[i].rb_reversed; + return TRUE; + } + } + return FALSE; +} + +static Bool source_is_covered(PicturePtr picture, + int x, int y, + int width, int height) +{ + int x1, y1, x2, y2; + + if (picture->repeat && picture->repeatType != RepeatNone) + return TRUE; + + if (picture->pDrawable == NULL) + return FALSE; + + if (picture->transform) { + pixman_box16_t sample; + + sample.x1 = x; + sample.y1 = y; + sample.x2 = x + width; + sample.y2 = y + height; + + pixman_transform_bounds(picture->transform, &sample); + + x1 = sample.x1; + x2 = sample.x2; + y1 = sample.y1; + y2 = sample.y2; + } else { + x1 = x; + y1 = y; + x2 = x + width; + y2 = y + height; + } + + return + x1 >= 0 && y1 >= 0 && + x2 <= picture->pDrawable->width && + y2 <= picture->pDrawable->height; +} + +static Bool gen3_composite_channel_set_xformat(PicturePtr picture, + struct sna_composite_channel *channel, + int x, int y, + int width, int height) +{ + int i; + + if (PICT_FORMAT_A(picture->format) != 0) + return FALSE; + + if (width == 0 || height == 0) + return FALSE; + + if (!source_is_covered(picture, x, y, width, height)) + return FALSE; + + for (i = 0; i < ARRAY_SIZE(gen3_tex_formats); i++) { + if (gen3_tex_formats[i].xfmt == picture->format) { + channel->card_format = gen3_tex_formats[i].card_fmt; + channel->rb_reversed = gen3_tex_formats[i].rb_reversed; + channel->alpha_fixup = true; + return TRUE; + } + } + + return FALSE; +} + +static int +gen3_init_solid(struct sna *sna, + struct sna_composite_channel *channel, + uint32_t color) +{ + channel->gen3.mode = color; + channel->gen3.type = SHADER_CONSTANT; + if (color == 0) + channel->gen3.type = SHADER_ZERO; + if ((color & 0xff000000) == 0xff000000) + channel->is_opaque = true; + + /* for consistency */ + channel->repeat = RepeatNormal; + channel->filter = PictFilterNearest; + channel->pict_format = PICT_a8r8g8b8; + channel->card_format = MAPSURF_32BIT | MT_32BIT_ARGB8888; + + return 1; +} + +static void gen3_composite_channel_convert(struct sna_composite_channel *channel) +{ + if (channel->gen3.type == SHADER_TEXTURE) + channel->repeat = gen3_texture_repeat(channel->repeat); + else + channel->repeat = gen3_gradient_repeat(channel->repeat); + + channel->filter = gen3_filter(channel->filter); + if (channel->card_format == 0) + gen3_composite_channel_set_format(channel, channel->pict_format); +} + +static Bool gen3_gradient_setup(struct sna *sna, + PicturePtr picture, + struct sna_composite_channel *channel, + int16_t ox, int16_t oy) +{ + int16_t dx, dy; + + if (picture->repeat == 0) { + channel->repeat = RepeatNone; + } else switch (picture->repeatType) { + case RepeatNone: + case RepeatNormal: + case RepeatPad: + case RepeatReflect: + channel->repeat = picture->repeatType; + break; + default: + return FALSE; + } + + channel->bo = + sna_render_get_gradient(sna, + (PictGradient *)picture->pSourcePict); + if (channel->bo == NULL) + return FALSE; + + channel->pict_format = PICT_a8r8g8b8; + channel->card_format = MAPSURF_32BIT | MT_32BIT_ARGB8888; + channel->filter = PictFilterBilinear; + channel->is_affine = sna_transform_is_affine(picture->transform); + if (sna_transform_is_integer_translation(picture->transform, &dx, &dy)) { + DBG(("%s: integer translation (%d, %d), removing\n", + __FUNCTION__, dx, dy)); + ox += dx; + oy += dy; + channel->transform = NULL; + } else + channel->transform = picture->transform; + channel->width = channel->bo->pitch / 4; + channel->height = 1; + channel->offset[0] = ox; + channel->offset[1] = oy; + channel->scale[0] = channel->scale[1] = 1; + return TRUE; +} + +static int +gen3_init_linear(struct sna *sna, + PicturePtr picture, + struct sna_composite_op *op, + struct sna_composite_channel *channel, + int ox, int oy) +{ + PictLinearGradient *linear = + (PictLinearGradient *)picture->pSourcePict; + float x0, y0, sf; + float dx, dy, offset; + int n; + + DBG(("%s: p1=(%f, %f), p2=(%f, %f)\n", + __FUNCTION__, + xFixedToDouble(linear->p1.x), xFixedToDouble(linear->p1.y), + xFixedToDouble(linear->p2.x), xFixedToDouble(linear->p2.y))); + + if (linear->p2.x == linear->p1.x && linear->p2.y == linear->p1.y) + return 0; + + dx = xFixedToDouble(linear->p2.x - linear->p1.x); + dy = xFixedToDouble(linear->p2.y - linear->p1.y); + sf = dx*dx + dy*dy; + dx /= sf; + dy /= sf; + + x0 = xFixedToDouble(linear->p1.x); + y0 = xFixedToDouble(linear->p1.y); + offset = dx*x0 + dy*y0; + + n = op->u.gen3.num_constants; + channel->gen3.constants = FS_C0 + n / 4; + op->u.gen3.constants[n++] = dx; + op->u.gen3.constants[n++] = dy; + op->u.gen3.constants[n++] = -offset; + op->u.gen3.constants[n++] = 0; + + if (!gen3_gradient_setup(sna, picture, channel, ox, oy)) + return 0; + + channel->gen3.type = SHADER_LINEAR; + op->u.gen3.num_constants = n; + + DBG(("%s: dx=%f, dy=%f, offset=%f, constants=%d\n", + __FUNCTION__, dx, dy, -offset, channel->gen3.constants - FS_C0)); + return 1; +} + +static int +gen3_init_radial(struct sna *sna, + PicturePtr picture, + struct sna_composite_op *op, + struct sna_composite_channel *channel, + int ox, int oy) +{ + PictRadialGradient *radial = (PictRadialGradient *)picture->pSourcePict; + double dx, dy, dr, r1; + int n; + + dx = xFixedToDouble(radial->c2.x - radial->c1.x); + dy = xFixedToDouble(radial->c2.y - radial->c1.y); + dr = xFixedToDouble(radial->c2.radius - radial->c1.radius); + + r1 = xFixedToDouble(radial->c1.radius); + + n = op->u.gen3.num_constants; + channel->gen3.constants = FS_C0 + n / 4; + if (radial->c2.x == radial->c1.x && radial->c2.y == radial->c1.y) { + if (radial->c2.radius == radial->c1.radius) + return 0; + + op->u.gen3.constants[n++] = xFixedToDouble(radial->c1.x) / dr; + op->u.gen3.constants[n++] = xFixedToDouble(radial->c1.y) / dr; + op->u.gen3.constants[n++] = 1. / dr; + op->u.gen3.constants[n++] = -r1 / dr; + + channel->gen3.mode = RADIAL_ONE; + } else { + op->u.gen3.constants[n++] = -xFixedToDouble(radial->c1.x); + op->u.gen3.constants[n++] = -xFixedToDouble(radial->c1.y); + op->u.gen3.constants[n++] = r1; + op->u.gen3.constants[n++] = -4 * (dx*dx + dy*dy - dr*dr); + + op->u.gen3.constants[n++] = -2 * dx; + op->u.gen3.constants[n++] = -2 * dy; + op->u.gen3.constants[n++] = -2 * r1 * dr; + op->u.gen3.constants[n++] = 1 / (2 * (dx*dx + dy*dy - dr*dr)); + + channel->gen3.mode = RADIAL_TWO; + } + + if (!gen3_gradient_setup(sna, picture, channel, ox, oy)) + return 0; + + channel->gen3.type = SHADER_RADIAL; + op->u.gen3.num_constants = n; + return 1; +} + +static Bool +gen3_composite_picture(struct sna *sna, + PicturePtr picture, + struct sna_composite_op *op, + struct sna_composite_channel *channel, + int16_t x, int16_t y, + int16_t w, int16_t h, + int16_t dst_x, int16_t dst_y) +{ + PixmapPtr pixmap; + uint32_t color; + int16_t dx, dy; + + DBG(("%s: (%d, %d)x(%d, %d), dst=(%d, %d)\n", + __FUNCTION__, x, y, w, h, dst_x, dst_y)); + + channel->card_format = 0; + + if (picture->pDrawable == NULL) { + SourcePict *source = picture->pSourcePict; + int ret = 0; + + switch (source->type) { + case SourcePictTypeSolidFill: + ret = gen3_init_solid(sna, channel, + source->solidFill.color); + break; + + case SourcePictTypeLinear: + ret = gen3_init_linear(sna, picture, op, channel, + x - dst_x, y - dst_y); + break; + + case SourcePictTypeRadial: + ret = gen3_init_radial(sna, picture, op, channel, + x - dst_x, y - dst_y); + break; + } + + if (ret == 0) + ret = sna_render_picture_fixup(sna, picture, channel, + x, y, w, h, dst_x, dst_y); + return ret; + } + + if (sna_picture_is_solid(picture, &color)) + return gen3_init_solid(sna, channel, color); + + if (!gen3_check_repeat(picture->repeat)) + return sna_render_picture_fixup(sna, picture, channel, + x, y, w, h, dst_x, dst_y); + + if (!gen3_check_filter(picture->filter)) + return sna_render_picture_fixup(sna, picture, channel, + x, y, w, h, dst_x, dst_y); + + channel->repeat = picture->repeat ? picture->repeatType : RepeatNone; + channel->filter = picture->filter; + channel->pict_format = picture->format; + + pixmap = get_drawable_pixmap(picture->pDrawable); + get_drawable_deltas(picture->pDrawable, pixmap, &dx, &dy); + + x += dx + picture->pDrawable->x; + y += dy + picture->pDrawable->y; + + channel->is_affine = sna_transform_is_affine(picture->transform); + if (sna_transform_is_integer_translation(picture->transform, &dx, &dy)) { + DBG(("%s: integer translation (%d, %d), removing\n", + __FUNCTION__, dx, dy)); + x += dx; + y += dy; + channel->transform = NULL; + channel->filter = PictFilterNearest; + } else + channel->transform = picture->transform; + + if (!gen3_composite_channel_set_format(channel, picture->format) && + !gen3_composite_channel_set_xformat(picture, channel, x, y, w, h)) + return sna_render_picture_convert(sna, picture, channel, pixmap, + x, y, w, h, dst_x, dst_y); + + if (pixmap->drawable.width > 2048 || pixmap->drawable.height > 2048) + return sna_render_picture_extract(sna, picture, channel, + x, y, w, h, dst_x, dst_y); + + return sna_render_pixmap_bo(sna, channel, pixmap, + x, y, w, h, dst_x, dst_y); +} + +static inline Bool +picture_is_cpu(PicturePtr picture) +{ + if (!picture->pDrawable) + return FALSE; + + /* If it is a solid, try to use the render paths */ + if (picture->pDrawable->width == 1 && + picture->pDrawable->height == 1 && + picture->repeat) + return FALSE; + + return is_cpu(picture->pDrawable); +} + +static Bool +try_blt(struct sna *sna, + PicturePtr dst, + PicturePtr source, + int width, int height) +{ + if (sna->kgem.mode == KGEM_BLT) { + DBG(("%s: already performing BLT\n", __FUNCTION__)); + return TRUE; + } + + if (width > 2048 || height > 2048) { + DBG(("%s: operation too large for 3D pipe (%d, %d)\n", + __FUNCTION__, width, height)); + return TRUE; + } + + /* If we can sample directly from user-space, do so */ + if (sna->kgem.has_vmap) + return FALSE; + + /* is the source picture only in cpu memory e.g. a shm pixmap? */ + return picture_is_cpu(source); +} + +static void +gen3_align_vertex(struct sna *sna, + struct sna_composite_op *op) +{ + if (op->floats_per_vertex != sna->render_state.gen3.last_floats_per_vertex) { + DBG(("aligning vertex: was %d, now %d floats per vertex, %d->%d\n", + sna->render_state.gen3.last_floats_per_vertex, + op->floats_per_vertex, + sna->render.vertex_index, + (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex)); + sna->render.vertex_index = (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex; + sna->render.vertex_used = sna->render.vertex_index * op->floats_per_vertex; + sna->render_state.gen3.last_floats_per_vertex = op->floats_per_vertex; + } +} + +static Bool +gen3_composite_set_target(struct sna *sna, + struct sna_composite_op *op, + PicturePtr dst) +{ + struct sna_pixmap *priv; + + op->dst.pixmap = get_drawable_pixmap(dst->pDrawable); + op->dst.format = dst->format; + op->dst.width = op->dst.pixmap->drawable.width; + op->dst.height = op->dst.pixmap->drawable.height; + priv = sna_pixmap(op->dst.pixmap); + + op->dst.bo = NULL; + if (priv && priv->gpu_bo == NULL) { + op->dst.bo = priv->cpu_bo; + op->damage = &priv->cpu_damage; + } + if (op->dst.bo == NULL) { + priv = sna_pixmap_force_to_gpu(op->dst.pixmap); + if (priv == NULL) + return FALSE; + + op->dst.bo = priv->gpu_bo; + if (!priv->gpu_only) + op->damage = &priv->gpu_damage; + } + + get_drawable_deltas(dst->pDrawable, op->dst.pixmap, + &op->dst.x, &op->dst.y); + + DBG(("%s: pixmap=%p, format=%08x, size=%dx%d, pitch=%d, delta=(%d,%d)\n", + __FUNCTION__, + op->dst.pixmap, (int)op->dst.format, + op->dst.width, op->dst.height, + op->dst.bo->pitch, + op->dst.x, op->dst.y)); + + return TRUE; +} + +static inline uint8_t mult(uint32_t s, uint32_t m, int shift) +{ + s = (s >> shift) & 0xff; + m = (m >> shift) & 0xff; + return (s * m) >> 8; +} + +static Bool +gen3_render_composite(struct sna *sna, + uint8_t op, + PicturePtr src, + PicturePtr mask, + PicturePtr dst, + int16_t src_x, int16_t src_y, + int16_t mask_x, int16_t mask_y, + int16_t dst_x, int16_t dst_y, + int16_t width, int16_t height, + struct sna_composite_op *tmp) +{ + DBG(("%s()\n", __FUNCTION__)); + +#if NO_COMPOSITE + return sna_blt_composite(sna, op, + src, dst, + src_x, src_y, + dst_x, dst_y, + width, height, tmp); +#endif + + /* Try to use the BLT engine unless it implies a + * 3D -> 2D context switch. + */ + if (mask == NULL && + try_blt(sna, dst, src, width, height) && + sna_blt_composite(sna, + op, src, dst, + src_x, src_y, + dst_x, dst_y, + width, height, + tmp)) + return TRUE; + + if (op >= ARRAY_SIZE(gen3_blend_op)) { + DBG(("%s: fallback due to unhandled blend op: %d\n", + __FUNCTION__, op)); + return FALSE; + } + + if (!gen3_check_dst_format(dst->format)) { + DBG(("%s: fallback due to unhandled dst format: %x\n", + __FUNCTION__, dst->format)); + return FALSE; + } + + if (need_tiling(sna, width, height)) + return sna_tiling_composite(sna, + op, src, mask, dst, + src_x, src_y, + mask_x, mask_y, + dst_x, dst_y, + width, height, + tmp); + + memset(&tmp->u.gen3, 0, sizeof(tmp->u.gen3)); + + if (!gen3_composite_set_target(sna, tmp, dst)) { + DBG(("%s: unable to set render target\n", + __FUNCTION__)); + return FALSE; + } + + tmp->op = op; + tmp->rb_reversed = gen3_dst_rb_reversed(tmp->dst.format); + if (tmp->dst.width > 2048 || tmp->dst.height > 2048 || + !gen3_check_pitch_3d(tmp->dst.bo)) { + if (!sna_render_composite_redirect(sna, tmp, + dst_x, dst_y, width, height)) + return FALSE; + } + + tmp->src.gen3.type = SHADER_TEXTURE; + tmp->src.is_affine = TRUE; + DBG(("%s: preparing source\n", __FUNCTION__)); + switch (gen3_composite_picture(sna, src, tmp, &tmp->src, + src_x, src_y, + width, height, + dst_x, dst_y)) { + case -1: + goto cleanup_dst; + case 0: + tmp->src.gen3.type = SHADER_ZERO; + break; + case 1: + gen3_composite_channel_convert(&tmp->src); + break; + } + DBG(("%s: source type=%d\n", __FUNCTION__, tmp->src.gen3.type)); + + tmp->mask.gen3.type = SHADER_NONE; + tmp->mask.is_affine = TRUE; + tmp->need_magic_ca_pass = FALSE; + tmp->has_component_alpha = FALSE; + if (mask && tmp->src.gen3.type != SHADER_ZERO) { + tmp->mask.gen3.type = SHADER_TEXTURE; + DBG(("%s: preparing mask\n", __FUNCTION__)); + switch (gen3_composite_picture(sna, mask, tmp, &tmp->mask, + mask_x, mask_y, + width, height, + dst_x, dst_y)) { + case -1: + goto cleanup_src; + case 0: + tmp->mask.gen3.type = SHADER_ZERO; + break; + case 1: + gen3_composite_channel_convert(&tmp->mask); + break; + } + DBG(("%s: mask type=%d\n", __FUNCTION__, tmp->mask.gen3.type)); + + if (tmp->mask.gen3.type == SHADER_ZERO) { + if (tmp->src.bo) { + kgem_bo_destroy(&sna->kgem, + tmp->src.bo); + tmp->src.bo = NULL; + } + tmp->src.gen3.type = SHADER_ZERO; + tmp->mask.gen3.type = SHADER_NONE; + } + + if (tmp->mask.gen3.type != SHADER_NONE && + mask->componentAlpha && PICT_FORMAT_RGB(mask->format)) { + /* Check if it's component alpha that relies on a source alpha + * and on the source value. We can only get one of those + * into the single source value that we get to blend with. + */ + tmp->has_component_alpha = TRUE; + if (tmp->mask.gen3.type == SHADER_CONSTANT && + tmp->mask.gen3.mode == 0xffffffff) { + tmp->mask.gen3.type = SHADER_NONE; + tmp->has_component_alpha = FALSE; + } else if (tmp->src.gen3.type == SHADER_CONSTANT && + tmp->src.gen3.mode == 0xffffffff) { + tmp->src = tmp->mask; + tmp->mask.gen3.type = SHADER_NONE; + tmp->mask.bo = NULL; + tmp->has_component_alpha = FALSE; + } else if (tmp->src.gen3.type == SHADER_CONSTANT && + tmp->mask.gen3.type == SHADER_CONSTANT) { + uint32_t a,r,g,b; + + a = mult(tmp->src.gen3.mode, + tmp->mask.gen3.mode, + 24); + r = mult(tmp->src.gen3.mode, + tmp->mask.gen3.mode, + 16); + g = mult(tmp->src.gen3.mode, + tmp->mask.gen3.mode, + 8); + b = mult(tmp->src.gen3.mode, + tmp->mask.gen3.mode, + 0); + + DBG(("%s: combining constant source/mask: %x x %x -> %x\n", + __FUNCTION__, + tmp->src.gen3.mode, + tmp->mask.gen3.mode, + a << 24 | r << 16 | g << 8 | b)); + + tmp->src.gen3.mode = + a << 24 | r << 16 | g << 8 | b; + + tmp->mask.gen3.type = SHADER_NONE; + tmp->has_component_alpha = FALSE; + } else if (gen3_blend_op[op].src_alpha && + (gen3_blend_op[op].src_blend != BLENDFACT_ZERO)) { + if (op != PictOpOver) + goto cleanup_mask; + + tmp->need_magic_ca_pass = TRUE; + tmp->op = PictOpOutReverse; + sna->render.vertex_start = sna->render.vertex_index; + } + } + } + DBG(("%s: final src/mask type=%d/%d, affine=%d/%d\n", __FUNCTION__, + tmp->src.gen3.type, tmp->mask.gen3.type, + tmp->src.is_affine, tmp->mask.is_affine)); + + tmp->prim_emit = gen3_emit_composite_primitive; + if (tmp->mask.gen3.type == SHADER_NONE || + tmp->mask.gen3.type == SHADER_CONSTANT) { + switch (tmp->src.gen3.type) { + case SHADER_NONE: + case SHADER_CONSTANT: + tmp->prim_emit = gen3_emit_composite_primitive_constant; + break; + case SHADER_LINEAR: + case SHADER_RADIAL: + if (tmp->src.transform == NULL) + tmp->prim_emit = gen3_emit_composite_primitive_identity_gradient; + else if (tmp->src.is_affine) + tmp->prim_emit = gen3_emit_composite_primitive_affine_gradient; + break; + case SHADER_TEXTURE: + if (tmp->src.transform == NULL) + tmp->prim_emit = gen3_emit_composite_primitive_identity_source; + else if (tmp->src.is_affine) + tmp->prim_emit = gen3_emit_composite_primitive_affine_source; + break; + } + } else if (tmp->mask.gen3.type == SHADER_TEXTURE) { + if (tmp->mask.transform == NULL) { + if (tmp->src.gen3.type == SHADER_CONSTANT) + tmp->prim_emit = gen3_emit_composite_primitive_constant_identity_mask; + else if (tmp->src.transform == NULL) + tmp->prim_emit = gen3_emit_composite_primitive_identity_source_mask; + else if (tmp->src.is_affine) + tmp->prim_emit = gen3_emit_composite_primitive_affine_source_mask; + } + } + + tmp->floats_per_vertex = 2; + if (tmp->src.gen3.type != SHADER_CONSTANT && + tmp->src.gen3.type != SHADER_ZERO) + tmp->floats_per_vertex += tmp->src.is_affine ? 2 : 3; + if (tmp->mask.gen3.type != SHADER_NONE && + tmp->mask.gen3.type != SHADER_CONSTANT) + tmp->floats_per_vertex += tmp->mask.is_affine ? 2 : 3; + DBG(("%s: floats_per_vertex = 2 + %d + %d = %d\n", __FUNCTION__, + (tmp->src.gen3.type != SHADER_CONSTANT && + tmp->src.gen3.type != SHADER_ZERO) ? + tmp->src.is_affine ? 2 : 3 : 0, + (tmp->mask.gen3.type != SHADER_NONE && + tmp->mask.gen3.type != SHADER_CONSTANT) ? + tmp->mask.is_affine ? 2 : 3 : 0, + tmp->floats_per_vertex)); + + tmp->blt = gen3_render_composite_blt; + tmp->boxes = gen3_render_composite_boxes; + tmp->done = gen3_render_composite_done; + + if (!kgem_check_bo(&sna->kgem, tmp->dst.bo)) + kgem_submit(&sna->kgem); + if (!kgem_check_bo(&sna->kgem, tmp->src.bo)) + kgem_submit(&sna->kgem); + if (!kgem_check_bo(&sna->kgem, tmp->mask.bo)) + kgem_submit(&sna->kgem); + + if (kgem_bo_is_dirty(tmp->src.bo) || kgem_bo_is_dirty(tmp->mask.bo)) { + if (tmp->src.bo == tmp->dst.bo || tmp->mask.bo == tmp->dst.bo) { + kgem_emit_flush(&sna->kgem); + } else { + OUT_BATCH(_3DSTATE_MODES_5_CMD | + PIPELINE_FLUSH_RENDER_CACHE | + PIPELINE_FLUSH_TEXTURE_CACHE); + kgem_clear_dirty(&sna->kgem); + } + } + + gen3_emit_composite_state(sna, tmp); + gen3_align_vertex(sna, tmp); + + sna->render.op = tmp; + return TRUE; + +cleanup_mask: + if (tmp->mask.bo) + kgem_bo_destroy(&sna->kgem, tmp->mask.bo); +cleanup_src: + if (tmp->src.bo) + kgem_bo_destroy(&sna->kgem, tmp->src.bo); +cleanup_dst: + if (tmp->redirect.real_bo) + kgem_bo_destroy(&sna->kgem, tmp->dst.bo); + return FALSE; +} + +static void +gen3_emit_composite_spans_vertex(struct sna *sna, + const struct sna_composite_spans_op *op, + int16_t x, int16_t y, + float opacity) +{ + gen3_emit_composite_dstcoord(sna, x + op->base.dst.x, y + op->base.dst.y); + gen3_emit_composite_texcoord(sna, &op->base.src, x, y); + OUT_VERTEX(opacity); +} + +static void +gen3_emit_composite_spans_primitive_zero(struct sna *sna, + const struct sna_composite_spans_op *op, + const BoxRec *box, + float opacity) +{ + float *v = sna->render.vertex_data + sna->render.vertex_used; + sna->render.vertex_used += 6; + + v[0] = op->base.dst.x + box->x2; + v[1] = op->base.dst.y + box->y2; + + v[2] = op->base.dst.x + box->x1; + v[3] = v[1]; + + v[4] = v[2]; + v[5] = op->base.dst.x + box->y1; +} + +static void +gen3_emit_composite_spans_primitive_constant(struct sna *sna, + const struct sna_composite_spans_op *op, + const BoxRec *box, + float opacity) +{ + float *v = sna->render.vertex_data + sna->render.vertex_used; + sna->render.vertex_used += 9; + + v[0] = op->base.dst.x + box->x2; + v[1] = op->base.dst.y + box->y2; + v[2] = opacity; + + v[3] = op->base.dst.x + box->x1; + v[4] = v[1]; + v[5] = opacity; + + v[6] = v[3]; + v[7] = op->base.dst.y + box->y1; + v[8] = opacity; +} + +static void +gen3_emit_composite_spans_primitive_identity_source(struct sna *sna, + const struct sna_composite_spans_op *op, + const BoxRec *box, + float opacity) +{ + float *v = sna->render.vertex_data + sna->render.vertex_used; + sna->render.vertex_used += 15; + + v[0] = op->base.dst.x + box->x2; + v[1] = op->base.dst.y + box->y2; + v[2] = (op->base.src.offset[0] + box->x2) * op->base.src.scale[0]; + v[3] = (op->base.src.offset[1] + box->y2) * op->base.src.scale[1]; + v[4] = opacity; + + v[5] = op->base.dst.x + box->x1; + v[6] = v[1]; + v[7] = (op->base.src.offset[0] + box->x1) * op->base.src.scale[0]; + v[8] = v[3]; + v[9] = opacity; + + v[10] = v[5]; + v[11] = op->base.dst.y + box->y1; + v[12] = v[7]; + v[13] = (op->base.src.offset[1] + box->y1) * op->base.src.scale[1]; + v[14] = opacity; +} + +static void +gen3_emit_composite_spans_primitive_affine_source(struct sna *sna, + const struct sna_composite_spans_op *op, + const BoxRec *box, + float opacity) +{ + PictTransform *transform = op->base.src.transform; + float x, y, *v; + + v = sna->render.vertex_data + sna->render.vertex_used; + sna->render.vertex_used += 15; + + v[0] = op->base.dst.x + box->x2; + v[6] = v[1] = op->base.dst.y + box->y2; + v[10] = v[5] = op->base.dst.x + box->x1; + v[11] = op->base.dst.y + box->y1; + v[4] = opacity; + v[9] = opacity; + v[14] = opacity; + + _sna_get_transformed_coordinates((int)op->base.src.offset[0] + box->x2, + (int)op->base.src.offset[1] + box->y2, + transform, + &x, &y); + v[2] = x * op->base.src.scale[0]; + v[3] = y * op->base.src.scale[1]; + + _sna_get_transformed_coordinates((int)op->base.src.offset[0] + box->x1, + (int)op->base.src.offset[1] + box->y2, + transform, + &x, &y); + v[7] = x * op->base.src.scale[0]; + v[8] = y * op->base.src.scale[1]; + + _sna_get_transformed_coordinates((int)op->base.src.offset[0] + box->x1, + (int)op->base.src.offset[1] + box->y1, + transform, + &x, &y); + v[12] = x * op->base.src.scale[0]; + v[13] = y * op->base.src.scale[1]; +} + +static void +gen3_emit_composite_spans_primitive_identity_gradient(struct sna *sna, + const struct sna_composite_spans_op *op, + const BoxRec *box, + float opacity) +{ + float *v = sna->render.vertex_data + sna->render.vertex_used; + sna->render.vertex_used += 15; + + v[0] = op->base.dst.x + box->x2; + v[1] = op->base.dst.y + box->y2; + v[2] = op->base.src.offset[0] + box->x2; + v[3] = op->base.src.offset[1] + box->y2; + v[4] = opacity; + + v[5] = op->base.dst.x + box->x1; + v[6] = v[1]; + v[7] = op->base.src.offset[0] + box->x1; + v[8] = v[3]; + v[9] = opacity; + + v[10] = v[5]; + v[11] = op->base.dst.y + box->y1; + v[12] = v[7]; + v[13] = op->base.src.offset[1] + box->y1; + v[14] = opacity; +} + +static void +gen3_emit_composite_spans_primitive_affine_gradient(struct sna *sna, + const struct sna_composite_spans_op *op, + const BoxRec *box, + float opacity) +{ + PictTransform *transform = op->base.src.transform; + float *v = sna->render.vertex_data + sna->render.vertex_used; + sna->render.vertex_used += 15; + + v[0] = op->base.dst.x + box->x2; + v[1] = op->base.dst.y + box->y2; + _sna_get_transformed_coordinates((int)op->base.src.offset[0] + box->x2, + (int)op->base.src.offset[1] + box->y2, + transform, + &v[2], &v[3]); + v[4] = opacity; + + v[5] = op->base.dst.x + box->x1; + v[6] = v[1]; + _sna_get_transformed_coordinates((int)op->base.src.offset[0] + box->x1, + (int)op->base.src.offset[1] + box->y2, + transform, + &v[7], &v[8]); + v[9] = opacity; + + v[10] = v[5]; + v[11] = op->base.dst.y + box->y1; + _sna_get_transformed_coordinates((int)op->base.src.offset[0] + box->x1, + (int)op->base.src.offset[1] + box->y1, + transform, + &v[12], &v[13]); + v[14] = opacity; +} + +static void +gen3_emit_composite_spans_primitive(struct sna *sna, + const struct sna_composite_spans_op *op, + const BoxRec *box, + float opacity) +{ + gen3_emit_composite_spans_vertex(sna, op, + box->x2, box->y2, + opacity); + gen3_emit_composite_spans_vertex(sna, op, + box->x1, box->y2, + opacity); + gen3_emit_composite_spans_vertex(sna, op, + box->x1, box->y1, + opacity); +} + +static void +gen3_render_composite_spans_boxes(struct sna *sna, + const struct sna_composite_spans_op *op, + const BoxRec *box, int nbox, + float opacity) +{ + DBG(("%s: nbox=%d, src=+(%d, %d), opacity=%f, dst=+(%d, %d)\n", + __FUNCTION__, nbox, + op->base.src.offset[0], op->base.src.offset[1], + opacity, + op->base.dst.x, op->base.dst.y)); + + do { + int nbox_this_time; + + nbox_this_time = gen3_get_rectangles(sna, &op->base, nbox); + if (nbox_this_time == 0) { + gen3_emit_composite_state(sna, &op->base); + nbox_this_time = gen3_get_rectangles(sna, &op->base, nbox); + } + nbox -= nbox_this_time; + + do { + DBG((" %s: (%d, %d) x (%d, %d)\n", __FUNCTION__, + box->x1, box->y1, + box->x2 - box->x1, + box->y2 - box->y1)); + + op->prim_emit(sna, op, box++, opacity); + } while (--nbox_this_time); + } while (nbox); +} + +static void +gen3_render_composite_spans_done(struct sna *sna, + const struct sna_composite_spans_op *op) +{ + gen3_vertex_flush(sna); + _kgem_set_mode(&sna->kgem, KGEM_RENDER); + + DBG(("%s()\n", __FUNCTION__)); + + sna_render_composite_redirect_done(sna, &op->base); + if (op->base.src.bo) + kgem_bo_destroy(&sna->kgem, op->base.src.bo); +} + +static Bool +gen3_render_composite_spans(struct sna *sna, + uint8_t op, + PicturePtr src, + PicturePtr dst, + int16_t src_x, int16_t src_y, + int16_t dst_x, int16_t dst_y, + int16_t width, int16_t height, + struct sna_composite_spans_op *tmp) +{ + DBG(("%s(src=(%d, %d), dst=(%d, %d), size=(%d, %d))\n", __FUNCTION__, + src_x, src_y, dst_x, dst_y, width, height)); + +#if NO_COMPOSITE_SPANS + return FALSE; +#endif + + if (op >= ARRAY_SIZE(gen3_blend_op)) { + DBG(("%s: fallback due to unhandled blend op: %d\n", + __FUNCTION__, op)); + return FALSE; + } + + if (!gen3_check_dst_format(dst->format)) { + DBG(("%s: fallback due to unhandled dst format: %x\n", + __FUNCTION__, dst->format)); + return FALSE; + } + + if (need_tiling(sna, width, height)) + return FALSE; + + if (!gen3_composite_set_target(sna, &tmp->base, dst)) { + DBG(("%s: unable to set render target\n", + __FUNCTION__)); + return FALSE; + } + + tmp->base.op = op; + tmp->base.rb_reversed = gen3_dst_rb_reversed(tmp->base.dst.format); + if (tmp->base.dst.width > 2048 || tmp->base.dst.height > 2048 || + !gen3_check_pitch_3d(tmp->base.dst.bo)) { + if (!sna_render_composite_redirect(sna, &tmp->base, + dst_x, dst_y, width, height)) + return FALSE; + } + + tmp->base.src.gen3.type = SHADER_TEXTURE; + tmp->base.src.is_affine = TRUE; + DBG(("%s: preparing source\n", __FUNCTION__)); + switch (gen3_composite_picture(sna, src, &tmp->base, &tmp->base.src, + src_x, src_y, + width, height, + dst_x, dst_y)) { + case -1: + goto cleanup_dst; + case 0: + tmp->base.src.gen3.type = SHADER_ZERO; + break; + case 1: + gen3_composite_channel_convert(&tmp->base.src); + break; + } + DBG(("%s: source type=%d\n", __FUNCTION__, tmp->base.src.gen3.type)); + + if (tmp->base.src.gen3.type != SHADER_ZERO) + tmp->base.mask.gen3.type = SHADER_OPACITY; + + tmp->prim_emit = gen3_emit_composite_spans_primitive; + switch (tmp->base.src.gen3.type) { + case SHADER_NONE: + assert(0); + case SHADER_ZERO: + tmp->prim_emit = gen3_emit_composite_spans_primitive_zero; + break; + case SHADER_CONSTANT: + tmp->prim_emit = gen3_emit_composite_spans_primitive_constant; + break; + case SHADER_LINEAR: + case SHADER_RADIAL: + if (tmp->base.src.transform == NULL) + tmp->prim_emit = gen3_emit_composite_spans_primitive_identity_gradient; + else if (tmp->base.src.is_affine) + tmp->prim_emit = gen3_emit_composite_spans_primitive_affine_gradient; + break; + case SHADER_TEXTURE: + if (tmp->base.src.transform == NULL) + tmp->prim_emit = gen3_emit_composite_spans_primitive_identity_source; + else if (tmp->base.src.is_affine) + tmp->prim_emit = gen3_emit_composite_spans_primitive_affine_source; + break; + } + + tmp->base.floats_per_vertex = 2; + if (tmp->base.src.gen3.type != SHADER_CONSTANT && + tmp->base.src.gen3.type != SHADER_ZERO) + tmp->base.floats_per_vertex += tmp->base.src.is_affine ? 2 : 3; + tmp->base.floats_per_vertex += + tmp->base.mask.gen3.type == SHADER_OPACITY; + + tmp->boxes = gen3_render_composite_spans_boxes; + tmp->done = gen3_render_composite_spans_done; + + if (!kgem_check_bo(&sna->kgem, tmp->base.dst.bo)) + kgem_submit(&sna->kgem); + if (!kgem_check_bo(&sna->kgem, tmp->base.src.bo)) + kgem_submit(&sna->kgem); + + if (kgem_bo_is_dirty(tmp->base.src.bo)) { + if (tmp->base.src.bo == tmp->base.dst.bo) { + kgem_emit_flush(&sna->kgem); + } else { + OUT_BATCH(_3DSTATE_MODES_5_CMD | + PIPELINE_FLUSH_RENDER_CACHE | + PIPELINE_FLUSH_TEXTURE_CACHE); + kgem_clear_dirty(&sna->kgem); + } + } + + gen3_emit_composite_state(sna, &tmp->base); + gen3_align_vertex(sna, &tmp->base); + return TRUE; + +cleanup_dst: + if (tmp->base.redirect.real_bo) + kgem_bo_destroy(&sna->kgem, tmp->base.dst.bo); + return FALSE; +} + +static void +gen3_emit_video_state(struct sna *sna, + struct sna_video *video, + struct sna_video_frame *frame, + PixmapPtr pixmap, + struct kgem_bo *dst_bo, + int width, int height) +{ + uint32_t shader_offset; + uint32_t ms3, s5; + + /* draw rect -- just clipping */ + OUT_BATCH(_3DSTATE_DRAW_RECT_CMD); + OUT_BATCH(DRAW_DITHER_OFS_X(pixmap->drawable.x & 3) | + DRAW_DITHER_OFS_Y(pixmap->drawable.y & 3)); + OUT_BATCH(0x00000000); /* ymin, xmin */ + /* ymax, xmax */ + OUT_BATCH((width - 1) | (height - 1) << 16); + OUT_BATCH(0x00000000); /* yorigin, xorigin */ + + OUT_BATCH(_3DSTATE_LOAD_STATE_IMMEDIATE_1 | + I1_LOAD_S(1) | I1_LOAD_S(2) | I1_LOAD_S(5) | I1_LOAD_S(6) | + 3); + OUT_BATCH((4 << S1_VERTEX_WIDTH_SHIFT) | (4 << S1_VERTEX_PITCH_SHIFT)); + OUT_BATCH(S2_TEXCOORD_FMT(0, TEXCOORDFMT_2D) | + S2_TEXCOORD_FMT(1, TEXCOORDFMT_NOT_PRESENT) | + S2_TEXCOORD_FMT(2, TEXCOORDFMT_NOT_PRESENT) | + S2_TEXCOORD_FMT(3, TEXCOORDFMT_NOT_PRESENT) | + S2_TEXCOORD_FMT(4, TEXCOORDFMT_NOT_PRESENT) | + S2_TEXCOORD_FMT(5, TEXCOORDFMT_NOT_PRESENT) | + S2_TEXCOORD_FMT(6, TEXCOORDFMT_NOT_PRESENT) | + S2_TEXCOORD_FMT(7, TEXCOORDFMT_NOT_PRESENT)); + s5 = 0x0; + if (pixmap->drawable.depth < 24) + s5 |= S5_COLOR_DITHER_ENABLE; + OUT_BATCH(s5); + OUT_BATCH((2 << S6_DEPTH_TEST_FUNC_SHIFT) | + (2 << S6_CBUF_SRC_BLEND_FACT_SHIFT) | + (1 << S6_CBUF_DST_BLEND_FACT_SHIFT) | + S6_COLOR_WRITE_ENABLE | (2 << S6_TRISTRIP_PV_SHIFT)); + + OUT_BATCH(_3DSTATE_CONST_BLEND_COLOR_CMD); + OUT_BATCH(0x00000000); + + OUT_BATCH(_3DSTATE_DST_BUF_VARS_CMD); + OUT_BATCH(gen3_get_dst_format(sna_format_for_depth(pixmap->drawable.depth))); + + /* front buffer, pitch, offset */ + OUT_BATCH(_3DSTATE_BUF_INFO_CMD); + OUT_BATCH(BUF_3D_ID_COLOR_BACK | + gen3_buf_tiling(dst_bo->tiling) | + dst_bo->pitch); + OUT_BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch, + dst_bo, + I915_GEM_DOMAIN_RENDER << 16 | + I915_GEM_DOMAIN_RENDER, + 0)); + + if (!is_planar_fourcc(frame->id)) { + OUT_BATCH(_3DSTATE_PIXEL_SHADER_CONSTANTS | 4); + OUT_BATCH(0x0000001); /* constant 0 */ + /* constant 0: brightness/contrast */ + OUT_BATCH_F(video->brightness / 128.0); + OUT_BATCH_F(video->contrast / 255.0); + OUT_BATCH_F(0.0); + OUT_BATCH_F(0.0); + + OUT_BATCH(_3DSTATE_SAMPLER_STATE | 3); + OUT_BATCH(0x00000001); + OUT_BATCH(SS2_COLORSPACE_CONVERSION | + (FILTER_LINEAR << SS2_MAG_FILTER_SHIFT) | + (FILTER_LINEAR << SS2_MIN_FILTER_SHIFT)); + OUT_BATCH((TEXCOORDMODE_CLAMP_EDGE << + SS3_TCX_ADDR_MODE_SHIFT) | + (TEXCOORDMODE_CLAMP_EDGE << + SS3_TCY_ADDR_MODE_SHIFT) | + (0 << SS3_TEXTUREMAP_INDEX_SHIFT) | + SS3_NORMALIZED_COORDS); + OUT_BATCH(0x00000000); + + OUT_BATCH(_3DSTATE_MAP_STATE | 3); + OUT_BATCH(0x00000001); /* texture map #1 */ + OUT_BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch, + frame->bo, + I915_GEM_DOMAIN_SAMPLER << 16, + frame->YBufOffset)); + + ms3 = MAPSURF_422; + switch (frame->id) { + case FOURCC_YUY2: + ms3 |= MT_422_YCRCB_NORMAL; + break; + case FOURCC_UYVY: + ms3 |= MT_422_YCRCB_SWAPY; + break; + } + ms3 |= (frame->height - 1) << MS3_HEIGHT_SHIFT; + ms3 |= (frame->width - 1) << MS3_WIDTH_SHIFT; + OUT_BATCH(ms3); + OUT_BATCH(((frame->pitch[0] / 4) - 1) << MS4_PITCH_SHIFT); + + shader_offset = sna->kgem.nbatch++; + + gen3_fs_dcl(FS_S0); + gen3_fs_dcl(FS_T0); + gen3_fs_texld(FS_OC, FS_S0, FS_T0); + if (video->brightness != 0) { + gen3_fs_add(FS_OC, + gen3_fs_operand_reg(FS_OC), + gen3_fs_operand(FS_C0, X, X, X, ZERO)); + } + } else { + /* For the planar formats, we set up three samplers -- + * one for each plane, in a Y8 format. Because I + * couldn't get the special PLANAR_TO_PACKED + * shader setup to work, I did the manual pixel shader: + * + * y' = y - .0625 + * u' = u - .5 + * v' = v - .5; + * + * r = 1.1643 * y' + 0.0 * u' + 1.5958 * v' + * g = 1.1643 * y' - 0.39173 * u' - 0.81290 * v' + * b = 1.1643 * y' + 2.017 * u' + 0.0 * v' + * + * register assignment: + * r0 = (y',u',v',0) + * r1 = (y,y,y,y) + * r2 = (u,u,u,u) + * r3 = (v,v,v,v) + * OC = (r,g,b,1) + */ + OUT_BATCH(_3DSTATE_PIXEL_SHADER_CONSTANTS | (22 - 2)); + OUT_BATCH(0x000001f); /* constants 0-4 */ + /* constant 0: normalization offsets */ + OUT_BATCH_F(-0.0625); + OUT_BATCH_F(-0.5); + OUT_BATCH_F(-0.5); + OUT_BATCH_F(0.0); + /* constant 1: r coefficients */ + OUT_BATCH_F(1.1643); + OUT_BATCH_F(0.0); + OUT_BATCH_F(1.5958); + OUT_BATCH_F(0.0); + /* constant 2: g coefficients */ + OUT_BATCH_F(1.1643); + OUT_BATCH_F(-0.39173); + OUT_BATCH_F(-0.81290); + OUT_BATCH_F(0.0); + /* constant 3: b coefficients */ + OUT_BATCH_F(1.1643); + OUT_BATCH_F(2.017); + OUT_BATCH_F(0.0); + OUT_BATCH_F(0.0); + /* constant 4: brightness/contrast */ + OUT_BATCH_F(video->brightness / 128.0); + OUT_BATCH_F(video->contrast / 255.0); + OUT_BATCH_F(0.0); + OUT_BATCH_F(0.0); + + OUT_BATCH(_3DSTATE_SAMPLER_STATE | 9); + OUT_BATCH(0x00000007); + /* sampler 0 */ + OUT_BATCH((FILTER_LINEAR << SS2_MAG_FILTER_SHIFT) | + (FILTER_LINEAR << SS2_MIN_FILTER_SHIFT)); + OUT_BATCH((TEXCOORDMODE_CLAMP_EDGE << + SS3_TCX_ADDR_MODE_SHIFT) | + (TEXCOORDMODE_CLAMP_EDGE << + SS3_TCY_ADDR_MODE_SHIFT) | + (0 << SS3_TEXTUREMAP_INDEX_SHIFT) | + SS3_NORMALIZED_COORDS); + OUT_BATCH(0x00000000); + /* sampler 1 */ + OUT_BATCH((FILTER_LINEAR << SS2_MAG_FILTER_SHIFT) | + (FILTER_LINEAR << SS2_MIN_FILTER_SHIFT)); + OUT_BATCH((TEXCOORDMODE_CLAMP_EDGE << + SS3_TCX_ADDR_MODE_SHIFT) | + (TEXCOORDMODE_CLAMP_EDGE << + SS3_TCY_ADDR_MODE_SHIFT) | + (1 << SS3_TEXTUREMAP_INDEX_SHIFT) | + SS3_NORMALIZED_COORDS); + OUT_BATCH(0x00000000); + /* sampler 2 */ + OUT_BATCH((FILTER_LINEAR << SS2_MAG_FILTER_SHIFT) | + (FILTER_LINEAR << SS2_MIN_FILTER_SHIFT)); + OUT_BATCH((TEXCOORDMODE_CLAMP_EDGE << + SS3_TCX_ADDR_MODE_SHIFT) | + (TEXCOORDMODE_CLAMP_EDGE << + SS3_TCY_ADDR_MODE_SHIFT) | + (2 << SS3_TEXTUREMAP_INDEX_SHIFT) | + SS3_NORMALIZED_COORDS); + OUT_BATCH(0x00000000); + + OUT_BATCH(_3DSTATE_MAP_STATE | 9); + OUT_BATCH(0x00000007); + + OUT_BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch, + frame->bo, + I915_GEM_DOMAIN_SAMPLER << 16, + frame->YBufOffset)); + + ms3 = MAPSURF_8BIT | MT_8BIT_I8; + ms3 |= (frame->height - 1) << MS3_HEIGHT_SHIFT; + ms3 |= (frame->width - 1) << MS3_WIDTH_SHIFT; + OUT_BATCH(ms3); + /* check to see if Y has special pitch than normal + * double u/v pitch, e.g i915 XvMC hw requires at + * least 1K alignment, so Y pitch might + * be same as U/V's.*/ + if (frame->pitch[1]) + OUT_BATCH(((frame->pitch[1] / 4) - 1) << MS4_PITCH_SHIFT); + else + OUT_BATCH(((frame->pitch[0] * 2 / 4) - 1) << MS4_PITCH_SHIFT); + + OUT_BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch, + frame->bo, + I915_GEM_DOMAIN_SAMPLER << 16, + frame->UBufOffset)); + + ms3 = MAPSURF_8BIT | MT_8BIT_I8; + ms3 |= (frame->height / 2 - 1) << MS3_HEIGHT_SHIFT; + ms3 |= (frame->width / 2 - 1) << MS3_WIDTH_SHIFT; + OUT_BATCH(ms3); + OUT_BATCH(((frame->pitch[0] / 4) - 1) << MS4_PITCH_SHIFT); + + OUT_BATCH(kgem_add_reloc(&sna->kgem, sna->kgem.nbatch, + frame->bo, + I915_GEM_DOMAIN_SAMPLER << 16, + frame->VBufOffset)); + + ms3 = MAPSURF_8BIT | MT_8BIT_I8; + ms3 |= (frame->height / 2 - 1) << MS3_HEIGHT_SHIFT; + ms3 |= (frame->width / 2 - 1) << MS3_WIDTH_SHIFT; + OUT_BATCH(ms3); + OUT_BATCH(((frame->pitch[0] / 4) - 1) << MS4_PITCH_SHIFT); + + shader_offset = sna->kgem.nbatch++; + + /* Declare samplers */ + gen3_fs_dcl(FS_S0); /* Y */ + gen3_fs_dcl(FS_S1); /* U */ + gen3_fs_dcl(FS_S2); /* V */ + gen3_fs_dcl(FS_T0); /* normalized coords */ + + /* Load samplers to temporaries. */ + gen3_fs_texld(FS_R1, FS_S0, FS_T0); + gen3_fs_texld(FS_R2, FS_S1, FS_T0); + gen3_fs_texld(FS_R3, FS_S2, FS_T0); + + /* Move the sampled YUV data in R[123] to the first + * 3 channels of R0. + */ + gen3_fs_mov_masked(FS_R0, MASK_X, + gen3_fs_operand_reg(FS_R1)); + gen3_fs_mov_masked(FS_R0, MASK_Y, + gen3_fs_operand_reg(FS_R2)); + gen3_fs_mov_masked(FS_R0, MASK_Z, + gen3_fs_operand_reg(FS_R3)); + + /* Normalize the YUV data */ + gen3_fs_add(FS_R0, gen3_fs_operand_reg(FS_R0), + gen3_fs_operand_reg(FS_C0)); + /* dot-product the YUV data in R0 by the vectors of + * coefficients for calculating R, G, and B, storing + * the results in the R, G, or B channels of the output + * color. The OC results are implicitly clamped + * at the end of the program. + */ + gen3_fs_dp3(FS_OC, MASK_X, + gen3_fs_operand_reg(FS_R0), + gen3_fs_operand_reg(FS_C1)); + gen3_fs_dp3(FS_OC, MASK_Y, + gen3_fs_operand_reg(FS_R0), + gen3_fs_operand_reg(FS_C2)); + gen3_fs_dp3(FS_OC, MASK_Z, + gen3_fs_operand_reg(FS_R0), + gen3_fs_operand_reg(FS_C3)); + /* Set alpha of the output to 1.0, by wiring W to 1 + * and not actually using the source. + */ + gen3_fs_mov_masked(FS_OC, MASK_W, + gen3_fs_operand_one()); + + if (video->brightness != 0) { + gen3_fs_add(FS_OC, + gen3_fs_operand_reg(FS_OC), + gen3_fs_operand(FS_C4, X, X, X, ZERO)); + } + } + + sna->kgem.batch[shader_offset] = + _3DSTATE_PIXEL_SHADER_PROGRAM | + (sna->kgem.nbatch - shader_offset - 2); + + /* video is the last operation in the batch, so state gets reset + * afterwards automatically + * gen3_reset(); + */ +} + +static void +gen3_video_get_batch(struct sna *sna) +{ + if (!kgem_check_batch(&sna->kgem, 120)) { + DBG(("%s: flushing batch: nbatch %d < %d\n", + __FUNCTION__, + batch_space(sna), 120)); + kgem_submit(&sna->kgem); + } + + if (sna->kgem.nreloc + 4 > KGEM_RELOC_SIZE(&sna->kgem)) { + DBG(("%s: flushing batch: reloc %d >= %d\n", + __FUNCTION__, + sna->kgem.nreloc + 4, + (int)KGEM_RELOC_SIZE(&sna->kgem))); + kgem_submit(&sna->kgem); + } + + if (sna->kgem.nexec + 2 > KGEM_EXEC_SIZE(&sna->kgem)) { + DBG(("%s: flushing batch: exec %d >= %d\n", + __FUNCTION__, + sna->kgem.nexec + 2, + (int)KGEM_EXEC_SIZE(&sna->kgem))); + kgem_submit(&sna->kgem); + } + + if (sna->render_state.gen3.need_invariant) + gen3_emit_invariant(sna); +} + +static int +gen3_get_inline_rectangles(struct sna *sna, int want, int floats_per_vertex) +{ + int size = floats_per_vertex * 3; + int rem = batch_space(sna) - 1; + + if (size * want > rem) + want = rem / size; + + return want; +} + +static Bool +gen3_render_video(struct sna *sna, + struct sna_video *video, + struct sna_video_frame *frame, + RegionPtr dstRegion, + short src_w, short src_h, + short drw_w, short drw_h, + PixmapPtr pixmap) +{ + BoxPtr pbox = REGION_RECTS(dstRegion); + int nbox = REGION_NUM_RECTS(dstRegion); + int dxo = dstRegion->extents.x1; + int dyo = dstRegion->extents.y1; + int width = dstRegion->extents.x2 - dxo; + int height = dstRegion->extents.y2 - dyo; + float src_scale_x, src_scale_y; + int pix_xoff, pix_yoff; + struct kgem_bo *dst_bo; + int copy = 0; + + DBG(("%s: %dx%d -> %dx%d\n", __FUNCTION__, src_w, src_h, drw_w, drw_h)); + + if (pixmap->drawable.width > 2048 || + pixmap->drawable.height > 2048 || + !gen3_check_pitch_3d(sna_pixmap_get_bo(pixmap))) { + int bpp = pixmap->drawable.bitsPerPixel; + + dst_bo = kgem_create_2d(&sna->kgem, + width, height, bpp, + kgem_choose_tiling(&sna->kgem, + I915_TILING_X, + width, height, bpp), + 0); + if (!dst_bo) + return FALSE; + + pix_xoff = -dxo; + pix_yoff = -dyo; + copy = 1; + } else { + dst_bo = sna_pixmap_get_bo(pixmap); + + width = pixmap->drawable.width; + height = pixmap->drawable.height; + + /* Set up the offset for translating from the given region + * (in screen coordinates) to the backing pixmap. + */ +#ifdef COMPOSITE + pix_xoff = -pixmap->screen_x + pixmap->drawable.x; + pix_yoff = -pixmap->screen_y + pixmap->drawable.y; +#else + pix_xoff = 0; + pix_yoff = 0; +#endif + } + + src_scale_x = ((float)src_w / frame->width) / drw_w; + src_scale_y = ((float)src_h / frame->height) / drw_h; + + DBG(("%s: src offset=(%d, %d), scale=(%f, %f), dst offset=(%d, %d)\n", + __FUNCTION__, + dxo, dyo, src_scale_x, src_scale_y, pix_xoff, pix_yoff)); + + gen3_video_get_batch(sna); + gen3_emit_video_state(sna, video, frame, pixmap, + dst_bo, width, height); + do { + int nbox_this_time = gen3_get_inline_rectangles(sna, nbox, 4); + if (nbox_this_time == 0) { + gen3_video_get_batch(sna); + gen3_emit_video_state(sna, video, frame, pixmap, + dst_bo, width, height); + nbox_this_time = gen3_get_inline_rectangles(sna, nbox, 4); + } + nbox -= nbox_this_time; + + OUT_BATCH(PRIM3D_RECTLIST | (12 * nbox_this_time - 1)); + while (nbox_this_time--) { + int box_x1 = pbox->x1; + int box_y1 = pbox->y1; + int box_x2 = pbox->x2; + int box_y2 = pbox->y2; + + pbox++; + + DBG(("%s: box (%d, %d), (%d, %d)\n", + __FUNCTION__, box_x1, box_y1, box_x2, box_y2)); + + /* bottom right */ + OUT_BATCH_F(box_x2 + pix_xoff); + OUT_BATCH_F(box_y2 + pix_yoff); + OUT_BATCH_F((box_x2 - dxo) * src_scale_x); + OUT_BATCH_F((box_y2 - dyo) * src_scale_y); + + /* bottom left */ + OUT_BATCH_F(box_x1 + pix_xoff); + OUT_BATCH_F(box_y2 + pix_yoff); + OUT_BATCH_F((box_x1 - dxo) * src_scale_x); + OUT_BATCH_F((box_y2 - dyo) * src_scale_y); + + /* top left */ + OUT_BATCH_F(box_x1 + pix_xoff); + OUT_BATCH_F(box_y1 + pix_yoff); + OUT_BATCH_F((box_x1 - dxo) * src_scale_x); + OUT_BATCH_F((box_y1 - dyo) * src_scale_y); + } + } while (nbox); + + if (copy) { +#ifdef COMPOSITE + pix_xoff = -pixmap->screen_x + pixmap->drawable.x; + pix_yoff = -pixmap->screen_y + pixmap->drawable.y; +#else + pix_xoff = 0; + pix_yoff = 0; +#endif + sna_blt_copy_boxes(sna, GXcopy, + dst_bo, -dxo, -dyo, + sna_pixmap_get_bo(pixmap), pix_xoff, pix_yoff, + pixmap->drawable.bitsPerPixel, + REGION_RECTS(dstRegion), + REGION_NUM_RECTS(dstRegion)); + + kgem_bo_destroy(&sna->kgem, dst_bo); + } + + return TRUE; +} + +static void +gen3_render_copy_setup_source(struct sna *sna, + struct sna_composite_channel *channel, + PixmapPtr pixmap, + struct kgem_bo *bo) +{ + channel->gen3.type = SHADER_TEXTURE; + channel->filter = gen3_filter(PictFilterNearest); + channel->repeat = gen3_texture_repeat(RepeatNone); + channel->width = pixmap->drawable.width; + channel->height = pixmap->drawable.height; + channel->scale[0] = 1./pixmap->drawable.width; + channel->scale[1] = 1./pixmap->drawable.height; + channel->offset[0] = 0; + channel->offset[1] = 0; + gen3_composite_channel_set_format(channel, + sna_format_for_depth(pixmap->drawable.depth)); + channel->bo = bo; + channel->is_affine = 1; +} + +static Bool +gen3_render_copy_boxes(struct sna *sna, uint8_t alu, + PixmapPtr src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy, + PixmapPtr dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, + const BoxRec *box, int n) +{ + struct sna_composite_op tmp; + +#if NO_COPY_BOXES + return sna_blt_copy_boxes(sna, alu, + src_bo, src_dx, src_dy, + dst_bo, dst_dx, dst_dy, + dst->drawable.bitsPerPixel, + box, n); +#endif + + DBG(("%s (%d, %d)->(%d, %d) x %d\n", + __FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, n)); + + if (sna_blt_copy_boxes(sna, alu, + src_bo, src_dx, src_dy, + dst_bo, dst_dx, dst_dy, + dst->drawable.bitsPerPixel, + box, n)) + return TRUE; + + if (!(alu == GXcopy || alu == GXclear) || + src_bo == dst_bo || /* XXX handle overlap using 3D ? */ + src_bo->pitch > 8192 || + src->drawable.width > 2048 || + src->drawable.height > 2048 || + dst_bo->pitch > 8192 || + dst->drawable.width > 2048 || + dst->drawable.height > 2048) + return sna_blt_copy_boxes(sna, alu, + src_bo, src_dx, src_dy, + dst_bo, dst_dx, dst_dy, + dst->drawable.bitsPerPixel, + box, n); + + if (!kgem_check_bo(&sna->kgem, dst_bo)) + kgem_submit(&sna->kgem); + if (!kgem_check_bo(&sna->kgem, src_bo)) + kgem_submit(&sna->kgem); + + if (kgem_bo_is_dirty(src_bo)) + kgem_emit_flush(&sna->kgem); + + memset(&tmp, 0, sizeof(tmp)); + tmp.op = alu == GXcopy ? PictOpSrc : PictOpClear; + + tmp.dst.pixmap = dst; + tmp.dst.width = dst->drawable.width; + tmp.dst.height = dst->drawable.height; + tmp.dst.format = sna_format_for_depth(dst->drawable.depth); + tmp.dst.bo = dst_bo; + + gen3_render_copy_setup_source(sna, &tmp.src, src, src_bo); + + tmp.floats_per_vertex = 4; + tmp.mask.gen3.type = SHADER_NONE; + + gen3_emit_composite_state(sna, &tmp); + gen3_align_vertex(sna, &tmp); + + do { + int n_this_time; + + n_this_time = gen3_get_rectangles(sna, &tmp, n); + if (n_this_time == 0) { + gen3_emit_composite_state(sna, &tmp); + n_this_time = gen3_get_rectangles(sna, &tmp, n); + } + n -= n_this_time; + + do { + DBG((" (%d, %d) -> (%d, %d) + (%d, %d)\n", + box->x1 + src_dx, box->y1 + src_dy, + box->x1 + dst_dx, box->y1 + dst_dy, + box->x2 - box->x1, box->y2 - box->y1)); + OUT_VERTEX(box->x2 + dst_dx); + OUT_VERTEX(box->y2 + dst_dy); + OUT_VERTEX((box->x2 + src_dx) * tmp.src.scale[0]); + OUT_VERTEX((box->y2 + src_dy) * tmp.src.scale[1]); + + OUT_VERTEX(box->x1 + dst_dx); + OUT_VERTEX(box->y2 + dst_dy); + OUT_VERTEX((box->x1 + src_dx) * tmp.src.scale[0]); + OUT_VERTEX((box->y2 + src_dy) * tmp.src.scale[1]); + + OUT_VERTEX(box->x1 + dst_dx); + OUT_VERTEX(box->y1 + dst_dy); + OUT_VERTEX((box->x1 + src_dx) * tmp.src.scale[0]); + OUT_VERTEX((box->y1 + src_dy) * tmp.src.scale[1]); + + box++; + } while (--n_this_time); + } while (n); + + gen3_vertex_flush(sna); + _kgem_set_mode(&sna->kgem, KGEM_RENDER); + return TRUE; +} + +static void +gen3_render_copy_blt(struct sna *sna, + const struct sna_copy_op *op, + int16_t sx, int16_t sy, + int16_t w, int16_t h, + int16_t dx, int16_t dy) +{ + if (!gen3_get_rectangles(sna, &op->base, 1)) { + gen3_emit_composite_state(sna, &op->base); + gen3_get_rectangles(sna, &op->base, 1); + } + + OUT_VERTEX(dx+w); + OUT_VERTEX(dy+h); + OUT_VERTEX((sx+w)*op->base.src.scale[0]); + OUT_VERTEX((sy+h)*op->base.src.scale[1]); + + OUT_VERTEX(dx); + OUT_VERTEX(dy+h); + OUT_VERTEX(sx*op->base.src.scale[0]); + OUT_VERTEX((sy+h)*op->base.src.scale[1]); + + OUT_VERTEX(dx); + OUT_VERTEX(dy); + OUT_VERTEX(sx*op->base.src.scale[0]); + OUT_VERTEX(sy*op->base.src.scale[1]); +} + +static void +gen3_render_copy_done(struct sna *sna, const struct sna_copy_op *op) +{ + gen3_vertex_flush(sna); + _kgem_set_mode(&sna->kgem, KGEM_RENDER); +} + +static Bool +gen3_render_copy(struct sna *sna, uint8_t alu, + PixmapPtr src, struct kgem_bo *src_bo, + PixmapPtr dst, struct kgem_bo *dst_bo, + struct sna_copy_op *tmp) +{ +#if NO_COPY + return sna_blt_copy(sna, alu, + src_bo, dst_bo, + dst->drawable.bitsPerPixel, + op); +#endif + + /* Prefer to use the BLT */ + if (sna->kgem.mode == KGEM_BLT && + src->drawable.bitsPerPixel == dst->drawable.bitsPerPixel && + sna_blt_copy(sna, alu, + src_bo, dst_bo, + dst->drawable.bitsPerPixel, + tmp)) + return TRUE; + + /* Must use the BLT if we can't RENDER... */ + if (!(alu == GXcopy || alu == GXclear) || + src->drawable.width > 2048 || src->drawable.height > 2048 || + dst->drawable.width > 2048 || dst->drawable.height > 2048 || + src_bo->pitch > 8192 || dst_bo->pitch > 8192) { + if (src->drawable.bitsPerPixel != dst->drawable.bitsPerPixel) + return FALSE; + + return sna_blt_copy(sna, alu, src_bo, dst_bo, + dst->drawable.bitsPerPixel, + tmp); + } + + tmp->base.op = alu == GXcopy ? PictOpSrc : PictOpClear; + + tmp->base.dst.pixmap = dst; + tmp->base.dst.width = dst->drawable.width; + tmp->base.dst.height = dst->drawable.height; + tmp->base.dst.format = sna_format_for_depth(dst->drawable.depth); + tmp->base.dst.bo = dst_bo; + + gen3_render_copy_setup_source(sna, &tmp->base.src, src, src_bo); + + tmp->base.floats_per_vertex = 4; + tmp->base.mask.gen3.type = SHADER_NONE; + + if (!kgem_check_bo(&sna->kgem, dst_bo)) + kgem_submit(&sna->kgem); + if (!kgem_check_bo(&sna->kgem, src_bo)) + kgem_submit(&sna->kgem); + + tmp->blt = gen3_render_copy_blt; + tmp->done = gen3_render_copy_done; + + gen3_emit_composite_state(sna, &tmp->base); + gen3_align_vertex(sna, &tmp->base); + return TRUE; +} + +static Bool +gen3_render_fill_boxes_try_blt(struct sna *sna, + CARD8 op, PictFormat format, + const xRenderColor *color, + PixmapPtr dst, struct kgem_bo *dst_bo, + const BoxRec *box, int n) +{ + uint8_t alu = GXcopy; + uint32_t pixel; + + if (!sna_get_pixel_from_rgba(&pixel, + color->red, + color->green, + color->blue, + color->alpha, + format)) + return FALSE; + + if (op == PictOpClear) { + alu = GXclear; + pixel = 0; + op = PictOpSrc; + } + + if (op == PictOpOver) { + if ((pixel & 0xff000000) == 0xff000000) + op = PictOpSrc; + } + + if (op != PictOpSrc) + return FALSE; + + return sna_blt_fill_boxes(sna, alu, + dst_bo, dst->drawable.bitsPerPixel, + pixel, box, n); +} + +static Bool +gen3_render_fill_boxes(struct sna *sna, + CARD8 op, + PictFormat format, + const xRenderColor *color, + PixmapPtr dst, struct kgem_bo *dst_bo, + const BoxRec *box, int n) +{ + struct sna_composite_op tmp; + uint32_t pixel; + +#if NO_FILL_BOXES + return gen3_render_fill_boxes_try_blt(sna, op, format, color, + dst, dst_bo, + box, n); +#endif + + DBG(("%s (op=%d, color=(%04x,%04x,%04x, %04x))\n", + __FUNCTION__, op, + color->red, color->green, color->blue, color->alpha)); + + if (op >= ARRAY_SIZE(gen3_blend_op)) { + DBG(("%s: fallback due to unhandled blend op: %d\n", + __FUNCTION__, op)); + return FALSE; + } + + if (dst->drawable.width > 2048 || + dst->drawable.height > 2048 || + dst_bo->pitch > 8192) + return gen3_render_fill_boxes_try_blt(sna, op, format, color, + dst, dst_bo, + box, n); + + if (gen3_render_fill_boxes_try_blt(sna, op, format, color, + dst, dst_bo, + box, n)) + return TRUE; + + if (!sna_get_pixel_from_rgba(&pixel, + color->red, + color->green, + color->blue, + color->alpha, + PICT_a8r8g8b8)) + return FALSE; + + memset(&tmp, 0, sizeof(tmp)); + tmp.op = op; + tmp.dst.pixmap = dst; + tmp.dst.width = dst->drawable.width; + tmp.dst.height = dst->drawable.height; + tmp.dst.format = format; + tmp.dst.bo = dst_bo; + tmp.floats_per_vertex = 2; + + tmp.src.gen3.type = SHADER_CONSTANT; + tmp.src.gen3.mode = pixel; + + if (!kgem_check_bo(&sna->kgem, dst_bo)) + kgem_submit(&sna->kgem); + + gen3_emit_composite_state(sna, &tmp); + gen3_align_vertex(sna, &tmp); + + do { + int n_this_time = gen3_get_rectangles(sna, &tmp, n); + if (n_this_time == 0) { + gen3_emit_composite_state(sna, &tmp); + n_this_time = gen3_get_rectangles(sna, &tmp, n); + } + n -= n_this_time; + + do { + DBG((" (%d, %d), (%d, %d)\n", + box->x1, box->y1, box->x2, box->y2)); + OUT_VERTEX(box->x2); + OUT_VERTEX(box->y2); + OUT_VERTEX(box->x1); + OUT_VERTEX(box->y2); + OUT_VERTEX(box->x1); + OUT_VERTEX(box->y1); + box++; + } while (--n_this_time); + } while (n); + + gen3_vertex_flush(sna); + _kgem_set_mode(&sna->kgem, KGEM_RENDER); + return TRUE; +} + +static void +gen3_render_fill_blt(struct sna *sna, + const struct sna_fill_op *op, + int16_t x, int16_t y, int16_t w, int16_t h) +{ + if (!gen3_get_rectangles(sna, &op->base, 1)) { + gen3_emit_composite_state(sna, &op->base); + gen3_get_rectangles(sna, &op->base, 1); + } + + OUT_VERTEX(x+w); + OUT_VERTEX(y+h); + OUT_VERTEX(x); + OUT_VERTEX(y+h); + OUT_VERTEX(x); + OUT_VERTEX(y); +} + +static void +gen3_render_fill_done(struct sna *sna, const struct sna_fill_op *op) +{ + gen3_vertex_flush(sna); + _kgem_set_mode(&sna->kgem, KGEM_RENDER); +} + +static Bool +gen3_render_fill(struct sna *sna, uint8_t alu, + PixmapPtr dst, struct kgem_bo *dst_bo, + uint32_t color, + struct sna_fill_op *tmp) +{ +#if NO_FILL + return sna_blt_fill(sna, alu, + dst_bo, dst->drawable.bitsPerPixel, + color, + op); +#endif + + /* Prefer to use the BLT if already engaged */ + if (sna->kgem.mode == KGEM_BLT && + sna_blt_fill(sna, alu, + dst_bo, dst->drawable.bitsPerPixel, + color, + tmp)) + return TRUE; + + /* Must use the BLT if we can't RENDER... */ + if (!(alu == GXcopy || alu == GXclear) || + dst->drawable.width > 2048 || dst->drawable.height > 2048 || + dst_bo->pitch > 8192) + return sna_blt_fill(sna, alu, + dst_bo, dst->drawable.bitsPerPixel, + color, + tmp); + + if (alu == GXclear) + color = 0; + + tmp->base.op = color == 0 ? PictOpClear : PictOpSrc; + tmp->base.dst.pixmap = dst; + tmp->base.dst.width = dst->drawable.width; + tmp->base.dst.height = dst->drawable.height; + tmp->base.dst.format = sna_format_for_depth(dst->drawable.depth); + tmp->base.dst.bo = dst_bo; + tmp->base.floats_per_vertex = 2; + + tmp->base.src.gen3.type = SHADER_CONSTANT; + tmp->base.src.gen3.mode = + sna_rgba_for_color(color, dst->drawable.depth); + + if (!kgem_check_bo(&sna->kgem, dst_bo)) + kgem_submit(&sna->kgem); + + tmp->blt = gen3_render_fill_blt; + tmp->done = gen3_render_fill_done; + + gen3_emit_composite_state(sna, &tmp->base); + gen3_align_vertex(sna, &tmp->base); + return TRUE; +} + +static void gen3_render_flush(struct sna *sna) +{ + gen3_vertex_finish(sna, TRUE); +} + +static void +gen3_render_context_switch(struct sna *sna, + int new_mode) +{ +} + +static void +gen3_render_fini(struct sna *sna) +{ +} + +Bool gen3_render_init(struct sna *sna) +{ + struct sna_render *render = &sna->render; + + gen3_render_reset(sna); + + render->composite = gen3_render_composite; + render->composite_spans = gen3_render_composite_spans; + + render->video = gen3_render_video; + + render->copy_boxes = gen3_render_copy_boxes; + render->copy = gen3_render_copy; + + render->fill_boxes = gen3_render_fill_boxes; + render->fill = gen3_render_fill; + + render->reset = gen3_render_reset; + render->flush = gen3_render_flush; + render->context_switch = gen3_render_context_switch; + render->fini = gen3_render_fini; + + render->max_3d_size = 2048; + return TRUE; +} diff --git a/src/sna/gen3_render.h b/src/sna/gen3_render.h new file mode 100644 index 00000000..3272d5cb --- /dev/null +++ b/src/sna/gen3_render.h @@ -0,0 +1,1479 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef _I915_REG_H_ +#define _I915_REG_H_ + +#define CMD_3D (3 << 29) + +#define I915_SET_FIELD( var, mask, value ) (var &= ~(mask), var |= value) + +#define PRIM3D (CMD_3D | (0x1f<<24)) +#define PRIM3D_INDIRECT_SEQUENTIAL ((1<<23) | (0<<17)) +#define PRIM3D_TRILIST (PRIM3D | (0x0<<18)) +#define PRIM3D_TRISTRIP (PRIM3D | (0x1<<18)) +#define PRIM3D_TRISTRIP_RVRSE (PRIM3D | (0x2<<18)) +#define PRIM3D_TRIFAN (PRIM3D | (0x3<<18)) +#define PRIM3D_POLY (PRIM3D | (0x4<<18)) +#define PRIM3D_LINELIST (PRIM3D | (0x5<<18)) +#define PRIM3D_LINESTRIP (PRIM3D | (0x6<<18)) +#define PRIM3D_RECTLIST (PRIM3D | (0x7<<18)) +#define PRIM3D_POINTLIST (PRIM3D | (0x8<<18)) +#define PRIM3D_DIB (PRIM3D | (0x9<<18)) +#define PRIM3D_CLEAR_RECT (PRIM3D | (0xa<<18)) +#define PRIM3D_ZONE_INIT (PRIM3D | (0xd<<18)) +#define PRIM3D_MASK (0x1f<<18) + + +/* p137 */ +#define _3DSTATE_AA_CMD (CMD_3D | (0x06<<24)) +#define AA_LINE_ECAAR_WIDTH_ENABLE (1<<16) +#define AA_LINE_ECAAR_WIDTH_0_5 0 +#define AA_LINE_ECAAR_WIDTH_1_0 (1<<14) +#define AA_LINE_ECAAR_WIDTH_2_0 (2<<14) +#define AA_LINE_ECAAR_WIDTH_4_0 (3<<14) +#define AA_LINE_REGION_WIDTH_ENABLE (1<<8) +#define AA_LINE_REGION_WIDTH_0_5 0 +#define AA_LINE_REGION_WIDTH_1_0 (1<<6) +#define AA_LINE_REGION_WIDTH_2_0 (2<<6) +#define AA_LINE_REGION_WIDTH_4_0 (3<<6) + +/* 3DSTATE_BACKFACE_STENCIL_OPS, p138*/ +#define _3DSTATE_BACKFACE_STENCIL_OPS (CMD_3D | (0x8<<24)) +#define BFO_ENABLE_STENCIL_REF (1<<23) +#define BFO_STENCIL_REF_SHIFT 15 +#define BFO_STENCIL_REF_MASK (0xff<<15) +#define BFO_ENABLE_STENCIL_FUNCS (1<<14) +#define BFO_STENCIL_TEST_SHIFT 11 +#define BFO_STENCIL_TEST_MASK (0x7<<11) +#define BFO_STENCIL_FAIL_SHIFT 8 +#define BFO_STENCIL_FAIL_MASK (0x7<<8) +#define BFO_STENCIL_PASS_Z_FAIL_SHIFT 5 +#define BFO_STENCIL_PASS_Z_FAIL_MASK (0x7<<5) +#define BFO_STENCIL_PASS_Z_PASS_SHIFT 2 +#define BFO_STENCIL_PASS_Z_PASS_MASK (0x7<<2) +#define BFO_ENABLE_STENCIL_TWO_SIDE (1<<1) +#define BFO_STENCIL_TWO_SIDE (1<<0) + +/* 3DSTATE_BACKFACE_STENCIL_MASKS, p140 */ +#define _3DSTATE_BACKFACE_STENCIL_MASKS (CMD_3D | (0x9<<24)) +#define BFM_ENABLE_STENCIL_TEST_MASK (1<<17) +#define BFM_ENABLE_STENCIL_WRITE_MASK (1<<16) +#define BFM_STENCIL_TEST_MASK_SHIFT 8 +#define BFM_STENCIL_TEST_MASK_MASK (0xff<<8) +#define BFM_STENCIL_WRITE_MASK_SHIFT 0 +#define BFM_STENCIL_WRITE_MASK_MASK (0xff<<0) + +/* 3DSTATE_BIN_CONTROL p141 */ + +/* p143 */ +#define _3DSTATE_BUF_INFO_CMD (CMD_3D | (0x1d<<24) | (0x8e<<16) | 1) +/* Dword 1 */ +#define BUF_3D_ID_COLOR_BACK (0x3<<24) +#define BUF_3D_ID_DEPTH (0x7<<24) +#define BUF_3D_USE_FENCE (1<<23) +#define BUF_3D_TILED_SURFACE (1<<22) +#define BUF_3D_TILE_WALK_X 0 +#define BUF_3D_TILE_WALK_Y (1<<21) +/* Dword 2 */ +#define BUF_3D_ADDR(x) ((x) & ~0x3) + +/* 3DSTATE_CHROMA_KEY */ + +/* 3DSTATE_CLEAR_PARAMETERS, p150 */ +#define _3DSTATE_CLEAR_PARAMETERS (CMD_3D | (0x1d<<24) | (0x9c<<16) | 5) +/* Dword 1 */ +#define CLEARPARAM_CLEAR_RECT (1 << 16) +#define CLEARPARAM_ZONE_INIT (0 << 16) +#define CLEARPARAM_WRITE_COLOR (1 << 2) +#define CLEARPARAM_WRITE_DEPTH (1 << 1) +#define CLEARPARAM_WRITE_STENCIL (1 << 0) + +/* 3DSTATE_CONSTANT_BLEND_COLOR, p153 */ +#define _3DSTATE_CONST_BLEND_COLOR_CMD (CMD_3D | (0x1d<<24) | (0x88<<16)) + +/* 3DSTATE_COORD_SET_BINDINGS, p154 */ +#define _3DSTATE_COORD_SET_BINDINGS (CMD_3D | (0x16<<24)) +#define CSB_TCB(iunit, eunit) ((eunit)<<(iunit*3)) + +/* p156 */ +#define _3DSTATE_DFLT_DIFFUSE_CMD (CMD_3D | (0x1d<<24) | (0x99<<16)) + +/* p157 */ +#define _3DSTATE_DFLT_SPEC_CMD (CMD_3D | (0x1d<<24) | (0x9a<<16)) + +/* p158 */ +#define _3DSTATE_DFLT_Z_CMD (CMD_3D | (0x1d<<24) | (0x98<<16)) + +/* 3DSTATE_DEPTH_OFFSET_SCALE, p159 */ +#define _3DSTATE_DEPTH_OFFSET_SCALE (CMD_3D | (0x1d<<24) | (0x97<<16)) +/* scale in dword 1 */ + +/* The depth subrectangle is not supported, but must be disabled. */ +/* 3DSTATE_DEPTH_SUBRECT_DISABLE, p160 */ +#define _3DSTATE_DEPTH_SUBRECT_DISABLE (CMD_3D | (0x1c<<24) | (0x11<<19) | (1 << 1) | (0 << 0)) + +/* p161 */ +#define _3DSTATE_DST_BUF_VARS_CMD (CMD_3D | (0x1d<<24) | (0x85<<16)) +/* Dword 1 */ +#define TEX_DEFAULT_COLOR_OGL (0<<30) +#define TEX_DEFAULT_COLOR_D3D (1<<30) +#define ZR_EARLY_DEPTH (1<<29) +#define LOD_PRECLAMP_OGL (1<<28) +#define LOD_PRECLAMP_D3D (0<<28) +#define DITHER_FULL_ALWAYS (0<<26) +#define DITHER_FULL_ON_FB_BLEND (1<<26) +#define DITHER_CLAMPED_ALWAYS (2<<26) +#define LINEAR_GAMMA_BLEND_32BPP (1<<25) +#define DEBUG_DISABLE_ENH_DITHER (1<<24) +#define DSTORG_HORT_BIAS(x) ((x)<<20) +#define DSTORG_VERT_BIAS(x) ((x)<<16) +#define COLOR_4_2_2_CHNL_WRT_ALL 0 +#define COLOR_4_2_2_CHNL_WRT_Y (1<<12) +#define COLOR_4_2_2_CHNL_WRT_CR (2<<12) +#define COLOR_4_2_2_CHNL_WRT_CB (3<<12) +#define COLOR_4_2_2_CHNL_WRT_CRCB (4<<12) +#define COLR_BUF_8BIT 0 +#define COLR_BUF_RGB555 (1<<8) +#define COLR_BUF_RGB565 (2<<8) +#define COLR_BUF_ARGB8888 (3<<8) +#define COLR_BUF_ARGB4444 (8<<8) +#define COLR_BUF_ARGB1555 (9<<8) +#define COLR_BUF_ARGB2AAA (0xa<<8) +#define DEPTH_IS_Z 0 +#define DEPTH_IS_W (1<<6) +#define DEPTH_FRMT_16_FIXED 0 +#define DEPTH_FRMT_16_FLOAT (1<<2) +#define DEPTH_FRMT_24_FIXED_8_OTHER (2<<2) +#define DEPTH_FRMT_24_FLOAT_8_OTHER (3<<2) +#define VERT_LINE_STRIDE_1 (1<<1) +#define VERT_LINE_STRIDE_0 0 +#define VERT_LINE_STRIDE_OFS_1 1 +#define VERT_LINE_STRIDE_OFS_0 0 + +/* p166 */ +#define _3DSTATE_DRAW_RECT_CMD (CMD_3D|(0x1d<<24)|(0x80<<16)|3) +/* Dword 1 */ +#define DRAW_RECT_DIS_DEPTH_OFS (1<<30) +#define DRAW_DITHER_OFS_X(x) ((x)<<26) +#define DRAW_DITHER_OFS_Y(x) ((x)<<24) +/* Dword 2 */ +#define DRAW_YMIN(x) ((uint16_t)(x)<<16) +#define DRAW_XMIN(x) ((uint16_t)(x)) +/* Dword 3 */ +#define DRAW_YMAX(x) ((uint16_t)(x)<<16) +#define DRAW_XMAX(x) ((uint16_t)(x)) +/* Dword 4 */ +#define DRAW_YORG(x) ((uint16_t)(x)<<16) +#define DRAW_XORG(x) ((uint16_t)(x)) + +/* 3DSTATE_FILTER_COEFFICIENTS_4X4, p170 */ + +/* 3DSTATE_FILTER_COEFFICIENTS_6X5, p172 */ + +/* _3DSTATE_FOG_COLOR, p173 */ +#define _3DSTATE_FOG_COLOR_CMD (CMD_3D|(0x15<<24)) +#define FOG_COLOR_RED(x) ((x)<<16) +#define FOG_COLOR_GREEN(x) ((x)<<8) +#define FOG_COLOR_BLUE(x) (x) + +/* _3DSTATE_FOG_MODE, p174 */ +#define _3DSTATE_FOG_MODE_CMD (CMD_3D|(0x1d<<24)|(0x89<<16)|2) +/* Dword 1 */ +#define FMC1_FOGFUNC_MODIFY_ENABLE (1<<31) +#define FMC1_FOGFUNC_VERTEX (0<<28) +#define FMC1_FOGFUNC_PIXEL_EXP (1<<28) +#define FMC1_FOGFUNC_PIXEL_EXP2 (2<<28) +#define FMC1_FOGFUNC_PIXEL_LINEAR (3<<28) +#define FMC1_FOGFUNC_MASK (3<<28) +#define FMC1_FOGINDEX_MODIFY_ENABLE (1<<27) +#define FMC1_FOGINDEX_Z (0<<25) +#define FMC1_FOGINDEX_W (1<<25) +#define FMC1_C1_C2_MODIFY_ENABLE (1<<24) +#define FMC1_DENSITY_MODIFY_ENABLE (1<<23) +#define FMC1_C1_ONE (1<<13) +#define FMC1_C1_MASK (0xffff<<4) +/* Dword 2 */ +#define FMC2_C2_ONE (1<<16) +/* Dword 3 */ +#define FMC3_D_ONE (1<<16) + +/* _3DSTATE_INDEPENDENT_ALPHA_BLEND, p177 */ +#define _3DSTATE_INDEPENDENT_ALPHA_BLEND_CMD (CMD_3D|(0x0b<<24)) +#define IAB_MODIFY_ENABLE (1<<23) +#define IAB_ENABLE (1<<22) +#define IAB_MODIFY_FUNC (1<<21) +#define IAB_FUNC_SHIFT 16 +#define IAB_MODIFY_SRC_FACTOR (1<<11) +#define IAB_SRC_FACTOR_SHIFT 6 +#define IAB_SRC_FACTOR_MASK (BLENDFACT_MASK<<6) +#define IAB_MODIFY_DST_FACTOR (1<<5) +#define IAB_DST_FACTOR_SHIFT 0 +#define IAB_DST_FACTOR_MASK (BLENDFACT_MASK<<0) + +#define BLENDFACT_ZERO 0x01 +#define BLENDFACT_ONE 0x02 +#define BLENDFACT_SRC_COLR 0x03 +#define BLENDFACT_INV_SRC_COLR 0x04 +#define BLENDFACT_SRC_ALPHA 0x05 +#define BLENDFACT_INV_SRC_ALPHA 0x06 +#define BLENDFACT_DST_ALPHA 0x07 +#define BLENDFACT_INV_DST_ALPHA 0x08 +#define BLENDFACT_DST_COLR 0x09 +#define BLENDFACT_INV_DST_COLR 0x0a +#define BLENDFACT_SRC_ALPHA_SATURATE 0x0b +#define BLENDFACT_CONST_COLOR 0x0c +#define BLENDFACT_INV_CONST_COLOR 0x0d +#define BLENDFACT_CONST_ALPHA 0x0e +#define BLENDFACT_INV_CONST_ALPHA 0x0f +#define BLENDFACT_MASK 0x0f + +#define BLENDFUNC_ADD 0x0 +#define BLENDFUNC_SUBTRACT 0x1 +#define BLENDFUNC_REVERSE_SUBTRACT 0x2 +#define BLENDFUNC_MIN 0x3 +#define BLENDFUNC_MAX 0x4 +#define BLENDFUNC_MASK 0x7 + +/* 3DSTATE_LOAD_INDIRECT, p180 */ + +#define _3DSTATE_LOAD_INDIRECT (CMD_3D|(0x1d<<24)|(0x7<<16)) +#define LI0_STATE_STATIC_INDIRECT (0x01<<8) +#define LI0_STATE_DYNAMIC_INDIRECT (0x02<<8) +#define LI0_STATE_SAMPLER (0x04<<8) +#define LI0_STATE_MAP (0x08<<8) +#define LI0_STATE_PROGRAM (0x10<<8) +#define LI0_STATE_CONSTANTS (0x20<<8) + +#define SIS0_BUFFER_ADDRESS(x) ((x)&~0x3) +#define SIS0_FORCE_LOAD (1<<1) +#define SIS0_BUFFER_VALID (1<<0) +#define SIS1_BUFFER_LENGTH(x) ((x)&0xff) + +#define DIS0_BUFFER_ADDRESS(x) ((x)&~0x3) +#define DIS0_BUFFER_RESET (1<<1) +#define DIS0_BUFFER_VALID (1<<0) + +#define SSB0_BUFFER_ADDRESS(x) ((x)&~0x3) +#define SSB0_FORCE_LOAD (1<<1) +#define SSB0_BUFFER_VALID (1<<0) +#define SSB1_BUFFER_LENGTH(x) ((x)&0xff) + +#define MSB0_BUFFER_ADDRESS(x) ((x)&~0x3) +#define MSB0_FORCE_LOAD (1<<1) +#define MSB0_BUFFER_VALID (1<<0) +#define MSB1_BUFFER_LENGTH(x) ((x)&0xff) + +#define PSP0_BUFFER_ADDRESS(x) ((x)&~0x3) +#define PSP0_FORCE_LOAD (1<<1) +#define PSP0_BUFFER_VALID (1<<0) +#define PSP1_BUFFER_LENGTH(x) ((x)&0xff) + +#define PSC0_BUFFER_ADDRESS(x) ((x)&~0x3) +#define PSC0_FORCE_LOAD (1<<1) +#define PSC0_BUFFER_VALID (1<<0) +#define PSC1_BUFFER_LENGTH(x) ((x)&0xff) + +/* _3DSTATE_RASTERIZATION_RULES */ +#define _3DSTATE_RASTER_RULES_CMD (CMD_3D|(0x07<<24)) +#define ENABLE_POINT_RASTER_RULE (1<<15) +#define OGL_POINT_RASTER_RULE (1<<13) +#define ENABLE_TEXKILL_3D_4D (1<<10) +#define TEXKILL_3D (0<<9) +#define TEXKILL_4D (1<<9) +#define ENABLE_LINE_STRIP_PROVOKE_VRTX (1<<8) +#define ENABLE_TRI_FAN_PROVOKE_VRTX (1<<5) +#define LINE_STRIP_PROVOKE_VRTX(x) ((x)<<6) +#define TRI_FAN_PROVOKE_VRTX(x) ((x)<<3) + +/* _3DSTATE_SCISSOR_ENABLE, p256 */ +#define _3DSTATE_SCISSOR_ENABLE_CMD (CMD_3D|(0x1c<<24)|(0x10<<19)) +#define ENABLE_SCISSOR_RECT ((1<<1) | 1) +#define DISABLE_SCISSOR_RECT (1<<1) + +/* _3DSTATE_SCISSOR_RECTANGLE_0, p257 */ +#define _3DSTATE_SCISSOR_RECT_0_CMD (CMD_3D|(0x1d<<24)|(0x81<<16)|1) +/* Dword 1 */ +#define SCISSOR_RECT_0_YMIN(x) ((x)<<16) +#define SCISSOR_RECT_0_XMIN(x) (x) +/* Dword 2 */ +#define SCISSOR_RECT_0_YMAX(x) ((x)<<16) +#define SCISSOR_RECT_0_XMAX(x) (x) + +/* p189 */ +#define _3DSTATE_LOAD_STATE_IMMEDIATE_1 ((0x3<<29)|(0x1d<<24)|(0x04<<16)) +#define I1_LOAD_S(n) (1<<(4+n)) + +#define S0_VB_OFFSET_MASK 0xffffffc +#define S0_AUTO_CACHE_INV_DISABLE (1<<0) + +#define S1_VERTEX_WIDTH_SHIFT 24 +#define S1_VERTEX_WIDTH_MASK (0x3f<<24) +#define S1_VERTEX_PITCH_SHIFT 16 +#define S1_VERTEX_PITCH_MASK (0x3f<<16) + +#define TEXCOORDFMT_2D 0x0 +#define TEXCOORDFMT_3D 0x1 +#define TEXCOORDFMT_4D 0x2 +#define TEXCOORDFMT_1D 0x3 +#define TEXCOORDFMT_2D_16 0x4 +#define TEXCOORDFMT_4D_16 0x5 +#define TEXCOORDFMT_NOT_PRESENT 0xf +#define S2_TEXCOORD_FMT0_MASK 0xf +#define S2_TEXCOORD_FMT1_SHIFT 4 +#define S2_TEXCOORD_FMT(unit, type) ((type)<<(unit*4)) +#define S2_TEXCOORD_NONE (~0) + +#define TEXCOORD_WRAP_SHORTEST_TCX 8 +#define TEXCOORD_WRAP_SHORTEST_TCY 4 +#define TEXCOORD_WRAP_SHORTEST_TCZ 2 +#define TEXCOORD_PERSPECTIVE_DISABLE 1 + +#define S3_WRAP_SHORTEST_TCX(unit) (TEXCOORD_WRAP_SHORTEST_TCX << ((unit) * 4)) +#define S3_WRAP_SHORTEST_TCY(unit) (TEXCOORD_WRAP_SHORTEST_TCY << ((unit) * 4)) +#define S3_WRAP_SHORTEST_TCZ(unit) (TEXCOORD_WRAP_SHORTEST_TCZ << ((unit) * 4)) +#define S3_PERSPECTIVE_DISABLE(unit) (TEXCOORD_PERSPECTIVE_DISABLE << ((unit) * 4)) + +/* S3 not interesting */ + +#define S4_POINT_WIDTH_SHIFT 23 +#define S4_POINT_WIDTH_MASK (0x1ff<<23) +#define S4_LINE_WIDTH_SHIFT 19 +#define S4_LINE_WIDTH_ONE (0x2<<19) +#define S4_LINE_WIDTH_MASK (0xf<<19) +#define S4_FLATSHADE_ALPHA (1<<18) +#define S4_FLATSHADE_FOG (1<<17) +#define S4_FLATSHADE_SPECULAR (1<<16) +#define S4_FLATSHADE_COLOR (1<<15) +#define S4_CULLMODE_BOTH (0<<13) +#define S4_CULLMODE_NONE (1<<13) +#define S4_CULLMODE_CW (2<<13) +#define S4_CULLMODE_CCW (3<<13) +#define S4_CULLMODE_MASK (3<<13) +#define S4_VFMT_POINT_WIDTH (1<<12) +#define S4_VFMT_SPEC_FOG (1<<11) +#define S4_VFMT_COLOR (1<<10) +#define S4_VFMT_DEPTH_OFFSET (1<<9) +#define S4_VFMT_XYZ (1<<6) +#define S4_VFMT_XYZW (2<<6) +#define S4_VFMT_XY (3<<6) +#define S4_VFMT_XYW (4<<6) +#define S4_VFMT_XYZW_MASK (7<<6) +#define S4_FORCE_DEFAULT_DIFFUSE (1<<5) +#define S4_FORCE_DEFAULT_SPECULAR (1<<4) +#define S4_LOCAL_DEPTH_OFFSET_ENABLE (1<<3) +#define S4_VFMT_FOG_PARAM (1<<2) +#define S4_SPRITE_POINT_ENABLE (1<<1) +#define S4_LINE_ANTIALIAS_ENABLE (1<<0) + +#define S4_VFMT_MASK (S4_VFMT_POINT_WIDTH | \ + S4_VFMT_SPEC_FOG | \ + S4_VFMT_COLOR | \ + S4_VFMT_DEPTH_OFFSET | \ + S4_VFMT_XYZW_MASK | \ + S4_VFMT_FOG_PARAM) + +#define S5_WRITEDISABLE_ALPHA (1<<31) +#define S5_WRITEDISABLE_RED (1<<30) +#define S5_WRITEDISABLE_GREEN (1<<29) +#define S5_WRITEDISABLE_BLUE (1<<28) +#define S5_WRITEDISABLE_MASK (0xf<<28) +#define S5_FORCE_DEFAULT_POINT_SIZE (1<<27) +#define S5_LAST_PIXEL_ENABLE (1<<26) +#define S5_GLOBAL_DEPTH_OFFSET_ENABLE (1<<25) +#define S5_FOG_ENABLE (1<<24) +#define S5_STENCIL_REF_SHIFT 16 +#define S5_STENCIL_REF_MASK (0xff<<16) +#define S5_STENCIL_TEST_FUNC_SHIFT 13 +#define S5_STENCIL_TEST_FUNC_MASK (0x7<<13) +#define S5_STENCIL_FAIL_SHIFT 10 +#define S5_STENCIL_FAIL_MASK (0x7<<10) +#define S5_STENCIL_PASS_Z_FAIL_SHIFT 7 +#define S5_STENCIL_PASS_Z_FAIL_MASK (0x7<<7) +#define S5_STENCIL_PASS_Z_PASS_SHIFT 4 +#define S5_STENCIL_PASS_Z_PASS_MASK (0x7<<4) +#define S5_STENCIL_WRITE_ENABLE (1<<3) +#define S5_STENCIL_TEST_ENABLE (1<<2) +#define S5_COLOR_DITHER_ENABLE (1<<1) +#define S5_LOGICOP_ENABLE (1<<0) + +#define S6_ALPHA_TEST_ENABLE (1<<31) +#define S6_ALPHA_TEST_FUNC_SHIFT 28 +#define S6_ALPHA_TEST_FUNC_MASK (0x7<<28) +#define S6_ALPHA_REF_SHIFT 20 +#define S6_ALPHA_REF_MASK (0xff<<20) +#define S6_DEPTH_TEST_ENABLE (1<<19) +#define S6_DEPTH_TEST_FUNC_SHIFT 16 +#define S6_DEPTH_TEST_FUNC_MASK (0x7<<16) +#define S6_CBUF_BLEND_ENABLE (1<<15) +#define S6_CBUF_BLEND_FUNC_SHIFT 12 +#define S6_CBUF_BLEND_FUNC_MASK (0x7<<12) +#define S6_CBUF_SRC_BLEND_FACT_SHIFT 8 +#define S6_CBUF_SRC_BLEND_FACT_MASK (0xf<<8) +#define S6_CBUF_DST_BLEND_FACT_SHIFT 4 +#define S6_CBUF_DST_BLEND_FACT_MASK (0xf<<4) +#define S6_DEPTH_WRITE_ENABLE (1<<3) +#define S6_COLOR_WRITE_ENABLE (1<<2) +#define S6_TRISTRIP_PV_SHIFT 0 +#define S6_TRISTRIP_PV_MASK (0x3<<0) + +#define S7_DEPTH_OFFSET_CONST_MASK ~0 + +/* 3DSTATE_MAP_DEINTERLACER_PARAMETERS */ +/* 3DSTATE_MAP_PALETTE_LOAD_32, p206 */ + +/* _3DSTATE_MODES_4, p218 */ +#define _3DSTATE_MODES_4_CMD (CMD_3D|(0x0d<<24)) +#define ENABLE_LOGIC_OP_FUNC (1<<23) +#define LOGIC_OP_FUNC(x) ((x)<<18) +#define LOGICOP_MASK (0xf<<18) +#define LOGICOP_COPY 0xc +#define MODE4_ENABLE_STENCIL_TEST_MASK ((1<<17)|(0xff00)) +#define ENABLE_STENCIL_TEST_MASK (1<<17) +#define STENCIL_TEST_MASK(x) ((x)<<8) +#define MODE4_ENABLE_STENCIL_WRITE_MASK ((1<<16)|(0x00ff)) +#define ENABLE_STENCIL_WRITE_MASK (1<<16) +#define STENCIL_WRITE_MASK(x) ((x)&0xff) + +/* _3DSTATE_MODES_5, p220 */ +#define _3DSTATE_MODES_5_CMD (CMD_3D|(0x0c<<24)) +#define PIPELINE_FLUSH_RENDER_CACHE (1<<18) +#define PIPELINE_FLUSH_TEXTURE_CACHE (1<<16) + +/* p221 */ +#define _3DSTATE_PIXEL_SHADER_CONSTANTS (CMD_3D|(0x1d<<24)|(0x6<<16)) +#define PS1_REG(n) (1<<(n)) +#define PS2_CONST_X(n) (n) +#define PS3_CONST_Y(n) (n) +#define PS4_CONST_Z(n) (n) +#define PS5_CONST_W(n) (n) + +/* p222 */ + +#define I915_MAX_TEX_INDIRECT 4 +#define I915_MAX_TEX_INSN 32 +#define I915_MAX_ALU_INSN 64 +#define I915_MAX_DECL_INSN 27 +#define I915_MAX_TEMPORARY 16 + +/* Each instruction is 3 dwords long, though most don't require all + * this space. Maximum of 123 instructions. Smaller maxes per insn + * type. + */ +#define _3DSTATE_PIXEL_SHADER_PROGRAM (CMD_3D|(0x1d<<24)|(0x5<<16)) + +#define REG_TYPE_R 0 /* temporary regs, no need to + * dcl, must be written before + * read -- Preserved between + * phases. + */ +#define REG_TYPE_T 1 /* Interpolated values, must be + * dcl'ed before use. + * + * 0..7: texture coord, + * 8: diffuse spec, + * 9: specular color, + * 10: fog parameter in w. + */ +#define REG_TYPE_CONST 2 /* Restriction: only one const + * can be referenced per + * instruction, though it may be + * selected for multiple inputs. + * Constants not initialized + * default to zero. + */ +#define REG_TYPE_S 3 /* sampler */ +#define REG_TYPE_OC 4 /* output color (rgba) */ +#define REG_TYPE_OD 5 /* output depth (w), xyz are + * temporaries. If not written, + * interpolated depth is used? + */ +#define REG_TYPE_U 6 /* unpreserved temporaries */ +#define REG_TYPE_MASK 0x7 +#define REG_NR_MASK 0xf + +/* REG_TYPE_T: + */ +#define T_TEX0 0 +#define T_TEX1 1 +#define T_TEX2 2 +#define T_TEX3 3 +#define T_TEX4 4 +#define T_TEX5 5 +#define T_TEX6 6 +#define T_TEX7 7 +#define T_DIFFUSE 8 +#define T_SPECULAR 9 +#define T_FOG_W 10 /* interpolated fog is in W coord */ + +/* Arithmetic instructions */ + +/* .replicate_swizzle == selection and replication of a particular + * scalar channel, ie., .xxxx, .yyyy, .zzzz or .wwww + */ +#define A0_NOP (0x0<<24) /* no operation */ +#define A0_ADD (0x1<<24) /* dst = src0 + src1 */ +#define A0_MOV (0x2<<24) /* dst = src0 */ +#define A0_MUL (0x3<<24) /* dst = src0 * src1 */ +#define A0_MAD (0x4<<24) /* dst = src0 * src1 + src2 */ +#define A0_DP2ADD (0x5<<24) /* dst.xyzw = src0.xy dot src1.xy + src2.replicate_swizzle */ +#define A0_DP3 (0x6<<24) /* dst.xyzw = src0.xyz dot src1.xyz */ +#define A0_DP4 (0x7<<24) /* dst.xyzw = src0.xyzw dot src1.xyzw */ +#define A0_FRC (0x8<<24) /* dst = src0 - floor(src0) */ +#define A0_RCP (0x9<<24) /* dst.xyzw = 1/(src0.replicate_swizzle) */ +#define A0_RSQ (0xa<<24) /* dst.xyzw = 1/(sqrt(abs(src0.replicate_swizzle))) */ +#define A0_EXP (0xb<<24) /* dst.xyzw = exp2(src0.replicate_swizzle) */ +#define A0_LOG (0xc<<24) /* dst.xyzw = log2(abs(src0.replicate_swizzle)) */ +#define A0_CMP (0xd<<24) /* dst = (src0 >= 0.0) ? src1 : src2 */ +#define A0_MIN (0xe<<24) /* dst = (src0 < src1) ? src0 : src1 */ +#define A0_MAX (0xf<<24) /* dst = (src0 >= src1) ? src0 : src1 */ +#define A0_FLR (0x10<<24) /* dst = floor(src0) */ +#define A0_MOD (0x11<<24) /* dst = src0 fmod 1.0 */ +#define A0_TRC (0x12<<24) /* dst = int(src0) */ +#define A0_SGE (0x13<<24) /* dst = src0 >= src1 ? 1.0 : 0.0 */ +#define A0_SLT (0x14<<24) /* dst = src0 < src1 ? 1.0 : 0.0 */ +#define A0_DEST_SATURATE (1<<22) +#define A0_DEST_TYPE_SHIFT 19 +/* Allow: R, OC, OD, U */ +#define A0_DEST_NR_SHIFT 14 +/* Allow R: 0..15, OC,OD: 0..0, U: 0..2 */ +#define A0_DEST_CHANNEL_X (1<<10) +#define A0_DEST_CHANNEL_Y (2<<10) +#define A0_DEST_CHANNEL_Z (4<<10) +#define A0_DEST_CHANNEL_W (8<<10) +#define A0_DEST_CHANNEL_ALL (0xf<<10) +#define A0_DEST_CHANNEL_SHIFT 10 +#define A0_SRC0_TYPE_SHIFT 7 +#define A0_SRC0_NR_SHIFT 2 + +#define A0_DEST_CHANNEL_XY (A0_DEST_CHANNEL_X|A0_DEST_CHANNEL_Y) +#define A0_DEST_CHANNEL_XYZ (A0_DEST_CHANNEL_XY|A0_DEST_CHANNEL_Z) + +#define SRC_X 0 +#define SRC_Y 1 +#define SRC_Z 2 +#define SRC_W 3 +#define SRC_ZERO 4 +#define SRC_ONE 5 + +#define A1_SRC0_CHANNEL_X_NEGATE (1<<31) +#define A1_SRC0_CHANNEL_X_SHIFT 28 +#define A1_SRC0_CHANNEL_Y_NEGATE (1<<27) +#define A1_SRC0_CHANNEL_Y_SHIFT 24 +#define A1_SRC0_CHANNEL_Z_NEGATE (1<<23) +#define A1_SRC0_CHANNEL_Z_SHIFT 20 +#define A1_SRC0_CHANNEL_W_NEGATE (1<<19) +#define A1_SRC0_CHANNEL_W_SHIFT 16 +#define A1_SRC1_TYPE_SHIFT 13 +#define A1_SRC1_NR_SHIFT 8 +#define A1_SRC1_CHANNEL_X_NEGATE (1<<7) +#define A1_SRC1_CHANNEL_X_SHIFT 4 +#define A1_SRC1_CHANNEL_Y_NEGATE (1<<3) +#define A1_SRC1_CHANNEL_Y_SHIFT 0 + +#define A2_SRC1_CHANNEL_Z_NEGATE (1<<31) +#define A2_SRC1_CHANNEL_Z_SHIFT 28 +#define A2_SRC1_CHANNEL_W_NEGATE (1<<27) +#define A2_SRC1_CHANNEL_W_SHIFT 24 +#define A2_SRC2_TYPE_SHIFT 21 +#define A2_SRC2_NR_SHIFT 16 +#define A2_SRC2_CHANNEL_X_NEGATE (1<<15) +#define A2_SRC2_CHANNEL_X_SHIFT 12 +#define A2_SRC2_CHANNEL_Y_NEGATE (1<<11) +#define A2_SRC2_CHANNEL_Y_SHIFT 8 +#define A2_SRC2_CHANNEL_Z_NEGATE (1<<7) +#define A2_SRC2_CHANNEL_Z_SHIFT 4 +#define A2_SRC2_CHANNEL_W_NEGATE (1<<3) +#define A2_SRC2_CHANNEL_W_SHIFT 0 + +/* Texture instructions */ +#define T0_TEXLD (0x15<<24) /* Sample texture using predeclared + * sampler and address, and output + * filtered texel data to destination + * register */ +#define T0_TEXLDP (0x16<<24) /* Same as texld but performs a + * perspective divide of the texture + * coordinate .xyz values by .w before + * sampling. */ +#define T0_TEXLDB (0x17<<24) /* Same as texld but biases the + * computed LOD by w. Only S4.6 two's + * comp is used. This implies that a + * float to fixed conversion is + * done. */ +#define T0_TEXKILL (0x18<<24) /* Does not perform a sampling + * operation. Simply kills the pixel + * if any channel of the address + * register is < 0.0. */ +#define T0_DEST_TYPE_SHIFT 19 +/* Allow: R, OC, OD, U */ +/* Note: U (unpreserved) regs do not retain their values between + * phases (cannot be used for feedback) + * + * Note: oC and OD registers can only be used as the destination of a + * texture instruction once per phase (this is an implementation + * restriction). + */ +#define T0_DEST_NR_SHIFT 14 +/* Allow R: 0..15, OC,OD: 0..0, U: 0..2 */ +#define T0_SAMPLER_NR_SHIFT 0 /* This field ignored for TEXKILL */ +#define T0_SAMPLER_NR_MASK (0xf<<0) + +#define T1_ADDRESS_REG_TYPE_SHIFT 24 /* Reg to use as texture coord */ +/* Allow R, T, OC, OD -- R, OC, OD are 'dependent' reads, new program phase */ +#define T1_ADDRESS_REG_NR_SHIFT 17 +#define T2_MBZ 0 + +/* Declaration instructions */ +#define D0_DCL (0x19<<24) /* Declare a t (interpolated attrib) + * register or an s (sampler) + * register. */ +#define D0_SAMPLE_TYPE_SHIFT 22 +#define D0_SAMPLE_TYPE_2D (0x0<<22) +#define D0_SAMPLE_TYPE_CUBE (0x1<<22) +#define D0_SAMPLE_TYPE_VOLUME (0x2<<22) +#define D0_SAMPLE_TYPE_MASK (0x3<<22) + +#define D0_TYPE_SHIFT 19 +/* Allow: T, S */ +#define D0_NR_SHIFT 14 +/* Allow T: 0..10, S: 0..15 */ +#define D0_CHANNEL_X (1<<10) +#define D0_CHANNEL_Y (2<<10) +#define D0_CHANNEL_Z (4<<10) +#define D0_CHANNEL_W (8<<10) +#define D0_CHANNEL_ALL (0xf<<10) +#define D0_CHANNEL_NONE (0<<10) + +#define D0_CHANNEL_XY (D0_CHANNEL_X|D0_CHANNEL_Y) +#define D0_CHANNEL_XYZ (D0_CHANNEL_XY|D0_CHANNEL_Z) + +/* I915 Errata: Do not allow (xz), (xw), (xzw) combinations for diffuse + * or specular declarations. + * + * For T dcls, only allow: (x), (xy), (xyz), (w), (xyzw) + * + * Must be zero for S (sampler) dcls + */ +#define D1_MBZ 0 +#define D2_MBZ 0 + +/* p207. + * The DWORD count is 3 times the number of bits set in MS1_MAPMASK_MASK + */ +#define _3DSTATE_MAP_STATE (CMD_3D|(0x1d<<24)|(0x0<<16)) + +#define MS1_MAPMASK_SHIFT 0 +#define MS1_MAPMASK_MASK (0x8fff<<0) + +#define MS2_UNTRUSTED_SURFACE (1<<31) +#define MS2_ADDRESS_MASK 0xfffffffc +#define MS2_VERTICAL_LINE_STRIDE (1<<1) +#define MS2_VERTICAL_OFFSET (1<<1) + +#define MS3_HEIGHT_SHIFT 21 +#define MS3_WIDTH_SHIFT 10 +#define MS3_PALETTE_SELECT (1<<9) +#define MS3_MAPSURF_FORMAT_SHIFT 7 +#define MS3_MAPSURF_FORMAT_MASK (0x7<<7) +#define MAPSURF_8BIT (1<<7) +#define MAPSURF_16BIT (2<<7) +#define MAPSURF_32BIT (3<<7) +#define MAPSURF_422 (5<<7) +#define MAPSURF_COMPRESSED (6<<7) +#define MAPSURF_4BIT_INDEXED (7<<7) +#define MS3_MT_FORMAT_MASK (0x7 << 3) +#define MS3_MT_FORMAT_SHIFT 3 +#define MT_4BIT_IDX_ARGB8888 (7<<3) /* SURFACE_4BIT_INDEXED */ +#define MT_8BIT_I8 (0<<3) /* SURFACE_8BIT */ +#define MT_8BIT_L8 (1<<3) +#define MT_8BIT_A8 (4<<3) +#define MT_8BIT_MONO8 (5<<3) +#define MT_16BIT_RGB565 (0<<3) /* SURFACE_16BIT */ +#define MT_16BIT_ARGB1555 (1<<3) +#define MT_16BIT_ARGB4444 (2<<3) +#define MT_16BIT_AY88 (3<<3) +#define MT_16BIT_88DVDU (5<<3) +#define MT_16BIT_BUMP_655LDVDU (6<<3) +#define MT_16BIT_I16 (7<<3) +#define MT_16BIT_L16 (8<<3) +#define MT_16BIT_A16 (9<<3) +#define MT_32BIT_ARGB8888 (0<<3) /* SURFACE_32BIT */ +#define MT_32BIT_ABGR8888 (1<<3) +#define MT_32BIT_XRGB8888 (2<<3) +#define MT_32BIT_XBGR8888 (3<<3) +#define MT_32BIT_QWVU8888 (4<<3) +#define MT_32BIT_AXVU8888 (5<<3) +#define MT_32BIT_LXVU8888 (6<<3) +#define MT_32BIT_XLVU8888 (7<<3) +#define MT_32BIT_ARGB2101010 (8<<3) +#define MT_32BIT_ABGR2101010 (9<<3) +#define MT_32BIT_AWVU2101010 (0xA<<3) +#define MT_32BIT_GR1616 (0xB<<3) +#define MT_32BIT_VU1616 (0xC<<3) +#define MT_32BIT_xI824 (0xD<<3) +#define MT_32BIT_xA824 (0xE<<3) +#define MT_32BIT_xL824 (0xF<<3) +#define MT_422_YCRCB_SWAPY (0<<3) /* SURFACE_422 */ +#define MT_422_YCRCB_NORMAL (1<<3) +#define MT_422_YCRCB_SWAPUV (2<<3) +#define MT_422_YCRCB_SWAPUVY (3<<3) +#define MT_COMPRESS_DXT1 (0<<3) /* SURFACE_COMPRESSED */ +#define MT_COMPRESS_DXT2_3 (1<<3) +#define MT_COMPRESS_DXT4_5 (2<<3) +#define MT_COMPRESS_FXT1 (3<<3) +#define MT_COMPRESS_DXT1_RGB (4<<3) +#define MS3_USE_FENCE_REGS (1<<2) +#define MS3_TILED_SURFACE (1<<1) +#define MS3_TILE_WALK (1<<0) + +/* The pitch is the pitch measured in DWORDS, minus 1 */ +#define MS4_PITCH_SHIFT 21 +#define MS4_CUBE_FACE_ENA_NEGX (1<<20) +#define MS4_CUBE_FACE_ENA_POSX (1<<19) +#define MS4_CUBE_FACE_ENA_NEGY (1<<18) +#define MS4_CUBE_FACE_ENA_POSY (1<<17) +#define MS4_CUBE_FACE_ENA_NEGZ (1<<16) +#define MS4_CUBE_FACE_ENA_POSZ (1<<15) +#define MS4_CUBE_FACE_ENA_MASK (0x3f<<15) +#define MS4_MAX_LOD_SHIFT 9 +#define MS4_MAX_LOD_MASK (0x3f<<9) +#define MS4_MIP_LAYOUT_LEGACY (0<<8) +#define MS4_MIP_LAYOUT_BELOW_LPT (0<<8) +#define MS4_MIP_LAYOUT_RIGHT_LPT (1<<8) +#define MS4_VOLUME_DEPTH_SHIFT 0 +#define MS4_VOLUME_DEPTH_MASK (0xff<<0) + +/* p244. + * The DWORD count is 3 times the number of bits set in SS1_MAPMASK_MASK. + */ +#define _3DSTATE_SAMPLER_STATE (CMD_3D|(0x1d<<24)|(0x1<<16)) + +#define SS1_MAPMASK_SHIFT 0 +#define SS1_MAPMASK_MASK (0x8fff<<0) + +#define SS2_REVERSE_GAMMA_ENABLE (1<<31) +#define SS2_PACKED_TO_PLANAR_ENABLE (1<<30) +#define SS2_COLORSPACE_CONVERSION (1<<29) +#define SS2_CHROMAKEY_SHIFT 27 +#define SS2_BASE_MIP_LEVEL_SHIFT 22 +#define SS2_BASE_MIP_LEVEL_MASK (0x1f<<22) +#define SS2_MIP_FILTER_SHIFT 20 +#define SS2_MIP_FILTER_MASK (0x3<<20) +#define MIPFILTER_NONE 0 +#define MIPFILTER_NEAREST 1 +#define MIPFILTER_LINEAR 3 +#define SS2_MAG_FILTER_SHIFT 17 +#define SS2_MAG_FILTER_MASK (0x7<<17) +#define FILTER_NEAREST 0 +#define FILTER_LINEAR 1 +#define FILTER_ANISOTROPIC 2 +#define FILTER_4X4_1 3 +#define FILTER_4X4_2 4 +#define FILTER_4X4_FLAT 5 +#define FILTER_6X5_MONO 6 /* XXX - check */ +#define SS2_MIN_FILTER_SHIFT 14 +#define SS2_MIN_FILTER_MASK (0x7<<14) +#define SS2_LOD_BIAS_SHIFT 5 +#define SS2_LOD_BIAS_ONE (0x10<<5) +#define SS2_LOD_BIAS_MASK (0x1ff<<5) +/* Shadow requires: + * MT_X8{I,L,A}24 or MT_{I,L,A}16 texture format + * FILTER_4X4_x MIN and MAG filters + */ +#define SS2_SHADOW_ENABLE (1<<4) +#define SS2_MAX_ANISO_MASK (1<<3) +#define SS2_MAX_ANISO_2 (0<<3) +#define SS2_MAX_ANISO_4 (1<<3) +#define SS2_SHADOW_FUNC_SHIFT 0 +#define SS2_SHADOW_FUNC_MASK (0x7<<0) +/* SS2_SHADOW_FUNC values: see COMPAREFUNC_* */ + +#define SS3_MIN_LOD_SHIFT 24 +#define SS3_MIN_LOD_ONE (0x10<<24) +#define SS3_MIN_LOD_MASK (0xff<<24) +#define SS3_KILL_PIXEL_ENABLE (1<<17) +#define SS3_TCX_ADDR_MODE_SHIFT 12 +#define SS3_TCX_ADDR_MODE_MASK (0x7<<12) +#define TEXCOORDMODE_WRAP 0 +#define TEXCOORDMODE_MIRROR 1 +#define TEXCOORDMODE_CLAMP_EDGE 2 +#define TEXCOORDMODE_CUBE 3 +#define TEXCOORDMODE_CLAMP_BORDER 4 +#define TEXCOORDMODE_MIRROR_ONCE 5 +#define SS3_TCY_ADDR_MODE_SHIFT 9 +#define SS3_TCY_ADDR_MODE_MASK (0x7<<9) +#define SS3_TCZ_ADDR_MODE_SHIFT 6 +#define SS3_TCZ_ADDR_MODE_MASK (0x7<<6) +#define SS3_NORMALIZED_COORDS (1<<5) +#define SS3_TEXTUREMAP_INDEX_SHIFT 1 +#define SS3_TEXTUREMAP_INDEX_MASK (0xf<<1) +#define SS3_DEINTERLACER_ENABLE (1<<0) + +#define SS4_BORDER_COLOR_MASK (~0) + +/* 3DSTATE_SPAN_STIPPLE, p258 + */ +#define _3DSTATE_STIPPLE ((0x3<<29)|(0x1d<<24)|(0x83<<16)) +#define ST1_ENABLE (1<<16) +#define ST1_MASK (0xffff) + +#define FLUSH_MAP_CACHE (1<<0) +#define FLUSH_RENDER_CACHE (1<<1) + +#endif +/* -*- c-basic-offset: 4 -*- */ +/* + * Copyright © 2006,2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Eric Anholt <eric@anholt.net> + * Chris Wilson <chris@chris-wilson.co.uk> + * + */ + +/* Each instruction is 3 dwords long, though most don't require all + * this space. Maximum of 123 instructions. Smaller maxes per insn + * type. + */ +#define _3DSTATE_PIXEL_SHADER_PROGRAM (CMD_3D|(0x1d<<24)|(0x5<<16)) + +#define REG_TYPE_R 0 /* temporary regs, no need to + * dcl, must be written before + * read -- Preserved between + * phases. + */ +#define REG_TYPE_T 1 /* Interpolated values, must be + * dcl'ed before use. + * + * 0..7: texture coord, + * 8: diffuse spec, + * 9: specular color, + * 10: fog parameter in w. + */ +#define REG_TYPE_CONST 2 /* Restriction: only one const + * can be referenced per + * instruction, though it may be + * selected for multiple inputs. + * Constants not initialized + * default to zero. + */ +#define REG_TYPE_S 3 /* sampler */ +#define REG_TYPE_OC 4 /* output color (rgba) */ +#define REG_TYPE_OD 5 /* output depth (w), xyz are + * temporaries. If not written, + * interpolated depth is used? + */ +#define REG_TYPE_U 6 /* unpreserved temporaries */ +#define REG_TYPE_MASK 0x7 +#define REG_TYPE_SHIFT 4 +#define REG_NR_MASK 0xf + +/* REG_TYPE_T: +*/ +#define T_TEX0 0 +#define T_TEX1 1 +#define T_TEX2 2 +#define T_TEX3 3 +#define T_TEX4 4 +#define T_TEX5 5 +#define T_TEX6 6 +#define T_TEX7 7 +#define T_DIFFUSE 8 +#define T_SPECULAR 9 +#define T_FOG_W 10 /* interpolated fog is in W coord */ + +/* Arithmetic instructions */ + +/* .replicate_swizzle == selection and replication of a particular + * scalar channel, ie., .xxxx, .yyyy, .zzzz or .wwww + */ +#define A0_NOP (0x0<<24) /* no operation */ +#define A0_ADD (0x1<<24) /* dst = src0 + src1 */ +#define A0_MOV (0x2<<24) /* dst = src0 */ +#define A0_MUL (0x3<<24) /* dst = src0 * src1 */ +#define A0_MAD (0x4<<24) /* dst = src0 * src1 + src2 */ +#define A0_DP2ADD (0x5<<24) /* dst.xyzw = src0.xy dot src1.xy + src2.replicate_swizzle */ +#define A0_DP3 (0x6<<24) /* dst.xyzw = src0.xyz dot src1.xyz */ +#define A0_DP4 (0x7<<24) /* dst.xyzw = src0.xyzw dot src1.xyzw */ +#define A0_FRC (0x8<<24) /* dst = src0 - floor(src0) */ +#define A0_RCP (0x9<<24) /* dst.xyzw = 1/(src0.replicate_swizzle) */ +#define A0_RSQ (0xa<<24) /* dst.xyzw = 1/(sqrt(abs(src0.replicate_swizzle))) */ +#define A0_EXP (0xb<<24) /* dst.xyzw = exp2(src0.replicate_swizzle) */ +#define A0_LOG (0xc<<24) /* dst.xyzw = log2(abs(src0.replicate_swizzle)) */ +#define A0_CMP (0xd<<24) /* dst = (src0 >= 0.0) ? src1 : src2 */ +#define A0_MIN (0xe<<24) /* dst = (src0 < src1) ? src0 : src1 */ +#define A0_MAX (0xf<<24) /* dst = (src0 >= src1) ? src0 : src1 */ +#define A0_FLR (0x10<<24) /* dst = floor(src0) */ +#define A0_MOD (0x11<<24) /* dst = src0 fmod 1.0 */ +#define A0_TRC (0x12<<24) /* dst = int(src0) */ +#define A0_SGE (0x13<<24) /* dst = src0 >= src1 ? 1.0 : 0.0 */ +#define A0_SLT (0x14<<24) /* dst = src0 < src1 ? 1.0 : 0.0 */ +#define A0_DEST_SATURATE (1<<22) +#define A0_DEST_TYPE_SHIFT 19 +/* Allow: R, OC, OD, U */ +#define A0_DEST_NR_SHIFT 14 +/* Allow R: 0..15, OC,OD: 0..0, U: 0..2 */ +#define A0_DEST_CHANNEL_X (1<<10) +#define A0_DEST_CHANNEL_Y (2<<10) +#define A0_DEST_CHANNEL_Z (4<<10) +#define A0_DEST_CHANNEL_W (8<<10) +#define A0_DEST_CHANNEL_ALL (0xf<<10) +#define A0_DEST_CHANNEL_SHIFT 10 +#define A0_SRC0_TYPE_SHIFT 7 +#define A0_SRC0_NR_SHIFT 2 + +#define A0_DEST_CHANNEL_XY (A0_DEST_CHANNEL_X|A0_DEST_CHANNEL_Y) +#define A0_DEST_CHANNEL_XYZ (A0_DEST_CHANNEL_XY|A0_DEST_CHANNEL_Z) + +#define SRC_X 0 +#define SRC_Y 1 +#define SRC_Z 2 +#define SRC_W 3 +#define SRC_ZERO 4 +#define SRC_ONE 5 + +#define A1_SRC0_CHANNEL_X_NEGATE (1<<31) +#define A1_SRC0_CHANNEL_X_SHIFT 28 +#define A1_SRC0_CHANNEL_Y_NEGATE (1<<27) +#define A1_SRC0_CHANNEL_Y_SHIFT 24 +#define A1_SRC0_CHANNEL_Z_NEGATE (1<<23) +#define A1_SRC0_CHANNEL_Z_SHIFT 20 +#define A1_SRC0_CHANNEL_W_NEGATE (1<<19) +#define A1_SRC0_CHANNEL_W_SHIFT 16 +#define A1_SRC1_TYPE_SHIFT 13 +#define A1_SRC1_NR_SHIFT 8 +#define A1_SRC1_CHANNEL_X_NEGATE (1<<7) +#define A1_SRC1_CHANNEL_X_SHIFT 4 +#define A1_SRC1_CHANNEL_Y_NEGATE (1<<3) +#define A1_SRC1_CHANNEL_Y_SHIFT 0 + +#define A2_SRC1_CHANNEL_Z_NEGATE (1<<31) +#define A2_SRC1_CHANNEL_Z_SHIFT 28 +#define A2_SRC1_CHANNEL_W_NEGATE (1<<27) +#define A2_SRC1_CHANNEL_W_SHIFT 24 +#define A2_SRC2_TYPE_SHIFT 21 +#define A2_SRC2_NR_SHIFT 16 +#define A2_SRC2_CHANNEL_X_NEGATE (1<<15) +#define A2_SRC2_CHANNEL_X_SHIFT 12 +#define A2_SRC2_CHANNEL_Y_NEGATE (1<<11) +#define A2_SRC2_CHANNEL_Y_SHIFT 8 +#define A2_SRC2_CHANNEL_Z_NEGATE (1<<7) +#define A2_SRC2_CHANNEL_Z_SHIFT 4 +#define A2_SRC2_CHANNEL_W_NEGATE (1<<3) +#define A2_SRC2_CHANNEL_W_SHIFT 0 + +/* Texture instructions */ +#define T0_TEXLD (0x15<<24) /* Sample texture using predeclared + * sampler and address, and output + * filtered texel data to destination + * register */ +#define T0_TEXLDP (0x16<<24) /* Same as texld but performs a + * perspective divide of the texture + * coordinate .xyz values by .w before + * sampling. */ +#define T0_TEXLDB (0x17<<24) /* Same as texld but biases the + * computed LOD by w. Only S4.6 two's + * comp is used. This implies that a + * float to fixed conversion is + * done. */ +#define T0_TEXKILL (0x18<<24) /* Does not perform a sampling + * operation. Simply kills the pixel + * if any channel of the address + * register is < 0.0. */ +#define T0_DEST_TYPE_SHIFT 19 +/* Allow: R, OC, OD, U */ +/* Note: U (unpreserved) regs do not retain their values between + * phases (cannot be used for feedback) + * + * Note: oC and OD registers can only be used as the destination of a + * texture instruction once per phase (this is an implementation + * restriction). + */ +#define T0_DEST_NR_SHIFT 14 +/* Allow R: 0..15, OC,OD: 0..0, U: 0..2 */ +#define T0_SAMPLER_NR_SHIFT 0 /* This field ignored for TEXKILL */ +#define T0_SAMPLER_NR_MASK (0xf<<0) + +#define T1_ADDRESS_REG_TYPE_SHIFT 24 /* Reg to use as texture coord */ +/* Allow R, T, OC, OD -- R, OC, OD are 'dependent' reads, new program phase */ +#define T1_ADDRESS_REG_NR_SHIFT 17 +#define T2_MBZ 0 + +/* Declaration instructions */ +#define D0_DCL (0x19<<24) /* Declare a t (interpolated attrib) + * register or an s (sampler) + * register. */ +#define D0_SAMPLE_TYPE_SHIFT 22 +#define D0_SAMPLE_TYPE_2D (0x0<<22) +#define D0_SAMPLE_TYPE_CUBE (0x1<<22) +#define D0_SAMPLE_TYPE_VOLUME (0x2<<22) +#define D0_SAMPLE_TYPE_MASK (0x3<<22) + +#define D0_TYPE_SHIFT 19 +/* Allow: T, S */ +#define D0_NR_SHIFT 14 +/* Allow T: 0..10, S: 0..15 */ +#define D0_CHANNEL_X (1<<10) +#define D0_CHANNEL_Y (2<<10) +#define D0_CHANNEL_Z (4<<10) +#define D0_CHANNEL_W (8<<10) +#define D0_CHANNEL_ALL (0xf<<10) +#define D0_CHANNEL_NONE (0<<10) + +#define D0_CHANNEL_XY (D0_CHANNEL_X|D0_CHANNEL_Y) +#define D0_CHANNEL_XYZ (D0_CHANNEL_XY|D0_CHANNEL_Z) + +/* I915 Errata: Do not allow (xz), (xw), (xzw) combinations for diffuse + * or specular declarations. + * + * For T dcls, only allow: (x), (xy), (xyz), (w), (xyzw) + * + * Must be zero for S (sampler) dcls + */ +#define D1_MBZ 0 +#define D2_MBZ 0 + + +/* MASK_* are the unshifted bitmasks of the destination mask in arithmetic + * operations + */ +#define MASK_X 0x1 +#define MASK_Y 0x2 +#define MASK_Z 0x4 +#define MASK_W 0x8 +#define MASK_XYZ (MASK_X | MASK_Y | MASK_Z) +#define MASK_XYZW (MASK_XYZ | MASK_W) +#define MASK_SATURATE 0x10 + +/* Temporary, undeclared regs. Preserved between phases */ +#define FS_R0 ((REG_TYPE_R << REG_TYPE_SHIFT) | 0) +#define FS_R1 ((REG_TYPE_R << REG_TYPE_SHIFT) | 1) +#define FS_R2 ((REG_TYPE_R << REG_TYPE_SHIFT) | 2) +#define FS_R3 ((REG_TYPE_R << REG_TYPE_SHIFT) | 3) + +/* Texture coordinate regs. Must be declared. */ +#define FS_T0 ((REG_TYPE_T << REG_TYPE_SHIFT) | 0) +#define FS_T1 ((REG_TYPE_T << REG_TYPE_SHIFT) | 1) +#define FS_T2 ((REG_TYPE_T << REG_TYPE_SHIFT) | 2) +#define FS_T3 ((REG_TYPE_T << REG_TYPE_SHIFT) | 3) +#define FS_T4 ((REG_TYPE_T << REG_TYPE_SHIFT) | 4) +#define FS_T5 ((REG_TYPE_T << REG_TYPE_SHIFT) | 5) +#define FS_T6 ((REG_TYPE_T << REG_TYPE_SHIFT) | 6) +#define FS_T7 ((REG_TYPE_T << REG_TYPE_SHIFT) | 7) +#define FS_T8 ((REG_TYPE_T << REG_TYPE_SHIFT) | 8) +#define FS_T9 ((REG_TYPE_T << REG_TYPE_SHIFT) | 9) +#define FS_T10 ((REG_TYPE_T << REG_TYPE_SHIFT) | 10) + +/* Constant values */ +#define FS_C0 ((REG_TYPE_CONST << REG_TYPE_SHIFT) | 0) +#define FS_C1 ((REG_TYPE_CONST << REG_TYPE_SHIFT) | 1) +#define FS_C2 ((REG_TYPE_CONST << REG_TYPE_SHIFT) | 2) +#define FS_C3 ((REG_TYPE_CONST << REG_TYPE_SHIFT) | 3) +#define FS_C4 ((REG_TYPE_CONST << REG_TYPE_SHIFT) | 4) +#define FS_C5 ((REG_TYPE_CONST << REG_TYPE_SHIFT) | 5) +#define FS_C6 ((REG_TYPE_CONST << REG_TYPE_SHIFT) | 6) +#define FS_C7 ((REG_TYPE_CONST << REG_TYPE_SHIFT) | 7) + +/* Sampler regs */ +#define FS_S0 ((REG_TYPE_S << REG_TYPE_SHIFT) | 0) +#define FS_S1 ((REG_TYPE_S << REG_TYPE_SHIFT) | 1) +#define FS_S2 ((REG_TYPE_S << REG_TYPE_SHIFT) | 2) +#define FS_S3 ((REG_TYPE_S << REG_TYPE_SHIFT) | 3) + +/* Output color */ +#define FS_OC ((REG_TYPE_OC << REG_TYPE_SHIFT) | 0) + +/* Output depth */ +#define FS_OD ((REG_TYPE_OD << REG_TYPE_SHIFT) | 0) + +/* Unpreserved temporary regs */ +#define FS_U0 ((REG_TYPE_U << REG_TYPE_SHIFT) | 0) +#define FS_U1 ((REG_TYPE_U << REG_TYPE_SHIFT) | 1) +#define FS_U2 ((REG_TYPE_U << REG_TYPE_SHIFT) | 2) +#define FS_U3 ((REG_TYPE_U << REG_TYPE_SHIFT) | 3) + +#define X_CHANNEL_SHIFT (REG_TYPE_SHIFT + 3) +#define Y_CHANNEL_SHIFT (X_CHANNEL_SHIFT + 4) +#define Z_CHANNEL_SHIFT (Y_CHANNEL_SHIFT + 4) +#define W_CHANNEL_SHIFT (Z_CHANNEL_SHIFT + 4) + +#define REG_CHANNEL_MASK 0xf + +#define REG_NR(reg) ((reg) & REG_NR_MASK) +#define REG_TYPE(reg) (((reg) >> REG_TYPE_SHIFT) & REG_TYPE_MASK) +#define REG_X(reg) (((reg) >> X_CHANNEL_SHIFT) & REG_CHANNEL_MASK) +#define REG_Y(reg) (((reg) >> Y_CHANNEL_SHIFT) & REG_CHANNEL_MASK) +#define REG_Z(reg) (((reg) >> Z_CHANNEL_SHIFT) & REG_CHANNEL_MASK) +#define REG_W(reg) (((reg) >> W_CHANNEL_SHIFT) & REG_CHANNEL_MASK) + +enum gen3_fs_channel { + X_CHANNEL_VAL = 0, + Y_CHANNEL_VAL, + Z_CHANNEL_VAL, + W_CHANNEL_VAL, + ZERO_CHANNEL_VAL, + ONE_CHANNEL_VAL, + + NEG_X_CHANNEL_VAL = X_CHANNEL_VAL | 0x8, + NEG_Y_CHANNEL_VAL = Y_CHANNEL_VAL | 0x8, + NEG_Z_CHANNEL_VAL = Z_CHANNEL_VAL | 0x8, + NEG_W_CHANNEL_VAL = W_CHANNEL_VAL | 0x8, + NEG_ONE_CHANNEL_VAL = ONE_CHANNEL_VAL | 0x8 +}; + +#define gen3_fs_operand(reg, x, y, z, w) \ + (reg) | \ +(x##_CHANNEL_VAL << X_CHANNEL_SHIFT) | \ +(y##_CHANNEL_VAL << Y_CHANNEL_SHIFT) | \ +(z##_CHANNEL_VAL << Z_CHANNEL_SHIFT) | \ +(w##_CHANNEL_VAL << W_CHANNEL_SHIFT) + +/** + * Construct an operand description for using a register with no swizzling + */ +#define gen3_fs_operand_reg(reg) \ + gen3_fs_operand(reg, X, Y, Z, W) + +#define gen3_fs_operand_reg_negate(reg) \ + gen3_fs_operand(reg, NEG_X, NEG_Y, NEG_Z, NEG_W) + +/** + * Returns an operand containing (0.0, 0.0, 0.0, 0.0). + */ +#define gen3_fs_operand_zero() gen3_fs_operand(FS_R0, ZERO, ZERO, ZERO, ZERO) + +/** + * Returns an unused operand + */ +#define gen3_fs_operand_none() gen3_fs_operand_zero() + +/** + * Returns an operand containing (1.0, 1.0, 1.0, 1.0). + */ +#define gen3_fs_operand_one() gen3_fs_operand(FS_R0, ONE, ONE, ONE, ONE) + +#define gen3_get_hardware_channel_val(val, shift, negate) \ + (((val & 0x7) << shift) | ((val & 0x8) ? negate : 0)) + +/** + * Outputs a fragment shader command to declare a sampler or texture register. + */ +#define gen3_fs_dcl(reg) \ + do { \ + OUT_BATCH(D0_DCL | \ + (REG_TYPE(reg) << D0_TYPE_SHIFT) | \ + (REG_NR(reg) << D0_NR_SHIFT) | \ + ((REG_TYPE(reg) != REG_TYPE_S) ? D0_CHANNEL_ALL : 0)); \ + OUT_BATCH(0); \ + OUT_BATCH(0); \ + } while (0) + +#define gen3_fs_texld(dest_reg, sampler_reg, address_reg) \ + do { \ + OUT_BATCH(T0_TEXLD | \ + (REG_TYPE(dest_reg) << T0_DEST_TYPE_SHIFT) | \ + (REG_NR(dest_reg) << T0_DEST_NR_SHIFT) | \ + (REG_NR(sampler_reg) << T0_SAMPLER_NR_SHIFT)); \ + OUT_BATCH((REG_TYPE(address_reg) << T1_ADDRESS_REG_TYPE_SHIFT) | \ + (REG_NR(address_reg) << T1_ADDRESS_REG_NR_SHIFT)); \ + OUT_BATCH(0); \ + } while (0) + +#define gen3_fs_texldp(dest_reg, sampler_reg, address_reg) \ + do { \ + OUT_BATCH(T0_TEXLDP | \ + (REG_TYPE(dest_reg) << T0_DEST_TYPE_SHIFT) | \ + (REG_NR(dest_reg) << T0_DEST_NR_SHIFT) | \ + (REG_NR(sampler_reg) << T0_SAMPLER_NR_SHIFT)); \ + OUT_BATCH((REG_TYPE(address_reg) << T1_ADDRESS_REG_TYPE_SHIFT) | \ + (REG_NR(address_reg) << T1_ADDRESS_REG_NR_SHIFT)); \ + OUT_BATCH(0); \ + } while (0) + +#define gen3_fs_arith_masked(op, dest_reg, dest_mask, operand0, operand1, operand2) \ + _gen3_fs_arith_masked(A0_##op, dest_reg, dest_mask, operand0, operand1, operand2) + +#define gen3_fs_arith(op, dest_reg, operand0, operand1, operand2) \ + _gen3_fs_arith(A0_##op, dest_reg, operand0, operand1, operand2) + +#define _gen3_fs_arith_masked(cmd, dest_reg, dest_mask, operand0, operand1, operand2) \ + do { \ + /* Set up destination register and write mask */ \ + OUT_BATCH(cmd | \ + (REG_TYPE(dest_reg) << A0_DEST_TYPE_SHIFT) | \ + (REG_NR(dest_reg) << A0_DEST_NR_SHIFT) | \ + (((dest_mask) & ~MASK_SATURATE) << A0_DEST_CHANNEL_SHIFT) | \ + (((dest_mask) & MASK_SATURATE) ? A0_DEST_SATURATE : 0) | \ + /* Set up operand 0 */ \ + (REG_TYPE(operand0) << A0_SRC0_TYPE_SHIFT) | \ + (REG_NR(operand0) << A0_SRC0_NR_SHIFT)); \ + OUT_BATCH(gen3_get_hardware_channel_val(REG_X(operand0), \ + A1_SRC0_CHANNEL_X_SHIFT, \ + A1_SRC0_CHANNEL_X_NEGATE) | \ + gen3_get_hardware_channel_val(REG_Y(operand0), \ + A1_SRC0_CHANNEL_Y_SHIFT, \ + A1_SRC0_CHANNEL_Y_NEGATE) | \ + gen3_get_hardware_channel_val(REG_Z(operand0), \ + A1_SRC0_CHANNEL_Z_SHIFT, \ + A1_SRC0_CHANNEL_Z_NEGATE) | \ + gen3_get_hardware_channel_val(REG_W(operand0), \ + A1_SRC0_CHANNEL_W_SHIFT, \ + A1_SRC0_CHANNEL_W_NEGATE) | \ + /* Set up operand 1 */ \ + (REG_TYPE(operand1) << A1_SRC1_TYPE_SHIFT) | \ + (REG_NR(operand1) << A1_SRC1_NR_SHIFT) | \ + gen3_get_hardware_channel_val(REG_X(operand1), \ + A1_SRC1_CHANNEL_X_SHIFT, \ + A1_SRC1_CHANNEL_X_NEGATE) | \ + gen3_get_hardware_channel_val(REG_Y(operand1), \ + A1_SRC1_CHANNEL_Y_SHIFT, \ + A1_SRC1_CHANNEL_Y_NEGATE)); \ + OUT_BATCH(gen3_get_hardware_channel_val(REG_Z(operand1), \ + A2_SRC1_CHANNEL_Z_SHIFT, \ + A2_SRC1_CHANNEL_Z_NEGATE) | \ + gen3_get_hardware_channel_val(REG_W(operand1), \ + A2_SRC1_CHANNEL_W_SHIFT, \ + A2_SRC1_CHANNEL_W_NEGATE) | \ + /* Set up operand 2 */ \ + (REG_TYPE(operand2) << A2_SRC2_TYPE_SHIFT) | \ + (REG_NR(operand2) << A2_SRC2_NR_SHIFT) | \ + gen3_get_hardware_channel_val(REG_X(operand2), \ + A2_SRC2_CHANNEL_X_SHIFT, \ + A2_SRC2_CHANNEL_X_NEGATE) | \ + gen3_get_hardware_channel_val(REG_Y(operand2), \ + A2_SRC2_CHANNEL_Y_SHIFT, \ + A2_SRC2_CHANNEL_Y_NEGATE) | \ + gen3_get_hardware_channel_val(REG_Z(operand2), \ + A2_SRC2_CHANNEL_Z_SHIFT, \ + A2_SRC2_CHANNEL_Z_NEGATE) | \ + gen3_get_hardware_channel_val(REG_W(operand2), \ + A2_SRC2_CHANNEL_W_SHIFT, \ + A2_SRC2_CHANNEL_W_NEGATE)); \ + } while (0) + +#define _gen3_fs_arith(cmd, dest_reg, operand0, operand1, operand2) do {\ + /* Set up destination register and write mask */ \ + OUT_BATCH(cmd | \ + (REG_TYPE(dest_reg) << A0_DEST_TYPE_SHIFT) | \ + (REG_NR(dest_reg) << A0_DEST_NR_SHIFT) | \ + (A0_DEST_CHANNEL_ALL) | \ + /* Set up operand 0 */ \ + (REG_TYPE(operand0) << A0_SRC0_TYPE_SHIFT) | \ + (REG_NR(operand0) << A0_SRC0_NR_SHIFT)); \ + OUT_BATCH(gen3_get_hardware_channel_val(REG_X(operand0), \ + A1_SRC0_CHANNEL_X_SHIFT, \ + A1_SRC0_CHANNEL_X_NEGATE) | \ + gen3_get_hardware_channel_val(REG_Y(operand0), \ + A1_SRC0_CHANNEL_Y_SHIFT, \ + A1_SRC0_CHANNEL_Y_NEGATE) | \ + gen3_get_hardware_channel_val(REG_Z(operand0), \ + A1_SRC0_CHANNEL_Z_SHIFT, \ + A1_SRC0_CHANNEL_Z_NEGATE) | \ + gen3_get_hardware_channel_val(REG_W(operand0), \ + A1_SRC0_CHANNEL_W_SHIFT, \ + A1_SRC0_CHANNEL_W_NEGATE) | \ + /* Set up operand 1 */ \ + (REG_TYPE(operand1) << A1_SRC1_TYPE_SHIFT) | \ + (REG_NR(operand1) << A1_SRC1_NR_SHIFT) | \ + gen3_get_hardware_channel_val(REG_X(operand1), \ + A1_SRC1_CHANNEL_X_SHIFT, \ + A1_SRC1_CHANNEL_X_NEGATE) | \ + gen3_get_hardware_channel_val(REG_Y(operand1), \ + A1_SRC1_CHANNEL_Y_SHIFT, \ + A1_SRC1_CHANNEL_Y_NEGATE)); \ + OUT_BATCH(gen3_get_hardware_channel_val(REG_Z(operand1), \ + A2_SRC1_CHANNEL_Z_SHIFT, \ + A2_SRC1_CHANNEL_Z_NEGATE) | \ + gen3_get_hardware_channel_val(REG_W(operand1), \ + A2_SRC1_CHANNEL_W_SHIFT, \ + A2_SRC1_CHANNEL_W_NEGATE) | \ + /* Set up operand 2 */ \ + (REG_TYPE(operand2) << A2_SRC2_TYPE_SHIFT) | \ + (REG_NR(operand2) << A2_SRC2_NR_SHIFT) | \ + gen3_get_hardware_channel_val(REG_X(operand2), \ + A2_SRC2_CHANNEL_X_SHIFT, \ + A2_SRC2_CHANNEL_X_NEGATE) | \ + gen3_get_hardware_channel_val(REG_Y(operand2), \ + A2_SRC2_CHANNEL_Y_SHIFT, \ + A2_SRC2_CHANNEL_Y_NEGATE) | \ + gen3_get_hardware_channel_val(REG_Z(operand2), \ + A2_SRC2_CHANNEL_Z_SHIFT, \ + A2_SRC2_CHANNEL_Z_NEGATE) | \ + gen3_get_hardware_channel_val(REG_W(operand2), \ + A2_SRC2_CHANNEL_W_SHIFT, \ + A2_SRC2_CHANNEL_W_NEGATE)); \ +} while (0) + +#define gen3_fs_mov(dest_reg, operand0) \ + gen3_fs_arith(MOV, dest_reg, \ + operand0, \ + gen3_fs_operand_none(), \ + gen3_fs_operand_none()) + +#define gen3_fs_mov_masked(dest_reg, dest_mask, operand0) \ + gen3_fs_arith_masked (MOV, dest_reg, dest_mask, \ + operand0, \ + gen3_fs_operand_none(), \ + gen3_fs_operand_none()) + + +#define gen3_fs_frc(dest_reg, operand0) \ + gen3_fs_arith (FRC, dest_reg, \ + operand0, \ + gen3_fs_operand_none(), \ + gen3_fs_operand_none()) + +/** Add operand0 and operand1 and put the result in dest_reg */ +#define gen3_fs_add(dest_reg, operand0, operand1) \ + gen3_fs_arith (ADD, dest_reg, \ + operand0, operand1, \ + gen3_fs_operand_none()) + +/** Multiply operand0 and operand1 and put the result in dest_reg */ +#define gen3_fs_mul(dest_reg, operand0, operand1) \ + gen3_fs_arith (MUL, dest_reg, \ + operand0, operand1, \ + gen3_fs_operand_none()) + +/** Computes 1/(operand0.replicate_swizzle) puts the result in dest_reg */ +#define gen3_fs_rcp(dest_reg, dest_mask, operand0) \ + do { \ + if (dest_mask) { \ + gen3_fs_arith_masked (RCP, dest_reg, dest_mask, \ + operand0, \ + gen3_fs_operand_none (), \ + gen3_fs_operand_none ()); \ + } else { \ + gen3_fs_arith (RCP, dest_reg, \ + operand0, \ + gen3_fs_operand_none (), \ + gen3_fs_operand_none ()); \ + } \ + } while (0) + +/** Computes 1/sqrt(operand0.replicate_swizzle) puts the result in dest_reg */ +#define gen3_fs_rsq(dest_reg, dest_mask, operand0) \ + do { \ + if (dest_mask) { \ + gen3_fs_arith_masked (RSQ, dest_reg, dest_mask, \ + operand0, \ + gen3_fs_operand_none (), \ + gen3_fs_operand_none ()); \ + } else { \ + gen3_fs_arith (RSQ, dest_reg, \ + operand0, \ + gen3_fs_operand_none (), \ + gen3_fs_operand_none ()); \ + } \ + } while (0) + +/** Puts the minimum of operand0 and operand1 in dest_reg */ +#define gen3_fs_min(dest_reg, operand0, operand1) \ + gen3_fs_arith (MIN, dest_reg, \ + operand0, operand1, \ + gen3_fs_operand_none()) + +/** Puts the maximum of operand0 and operand1 in dest_reg */ +#define gen3_fs_max(dest_reg, operand0, operand1) \ + gen3_fs_arith (MAX, dest_reg, \ + operand0, operand1, \ + gen3_fs_operand_none()) + +#define gen3_fs_cmp(dest_reg, operand0, operand1, operand2) \ + gen3_fs_arith (CMP, dest_reg, operand0, operand1, operand2) + +/** Perform operand0 * operand1 + operand2 and put the result in dest_reg */ +#define gen3_fs_mad(dest_reg, dest_mask, op0, op1, op2) \ + do { \ + if (dest_mask) { \ + gen3_fs_arith_masked (MAD, dest_reg, dest_mask, op0, op1, op2); \ + } else { \ + gen3_fs_arith (MAD, dest_reg, op0, op1, op2); \ + } \ + } while (0) + +#define gen3_fs_dp2add(dest_reg, dest_mask, op0, op1, op2) \ + do { \ + if (dest_mask) { \ + gen3_fs_arith_masked (DP2ADD, dest_reg, dest_mask, op0, op1, op2); \ + } else { \ + gen3_fs_arith (DP2ADD, dest_reg, op0, op1, op2); \ + } \ + } while (0) + +/** + * Perform a 3-component dot-product of operand0 and operand1 and put the + * resulting scalar in the channels of dest_reg specified by the dest_mask. + */ +#define gen3_fs_dp3(dest_reg, dest_mask, op0, op1) \ + do { \ + if (dest_mask) { \ + gen3_fs_arith_masked (DP3, dest_reg, dest_mask, \ + op0, op1,\ + gen3_fs_operand_none()); \ + } else { \ + gen3_fs_arith (DP3, dest_reg, op0, op1,\ + gen3_fs_operand_none()); \ + } \ + } while (0) + +/** + * Perform a 4-component dot-product of operand0 and operand1 and put the + * resulting scalar in the channels of dest_reg specified by the dest_mask. + */ +#define gen3_fs_dp4(dest_reg, dest_mask, op0, op1) \ + do { \ + if (dest_mask) { \ + gen3_fs_arith_masked (DP4, dest_reg, dest_mask, \ + op0, op1,\ + gen3_fs_operand_none()); \ + } else { \ + gen3_fs_arith (DP4, dest_reg, op0, op1,\ + gen3_fs_operand_none()); \ + } \ + } while (0) + +#define SHADER_TRAPEZOIDS (1 << 24) diff --git a/src/sna/gen4_render.c b/src/sna/gen4_render.c new file mode 100644 index 00000000..82fef2d6 --- /dev/null +++ b/src/sna/gen4_render.c @@ -0,0 +1,2762 @@ +/* + * Copyright © 2006,2008,2011 Intel Corporation + * Copyright © 2007 Red Hat, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Wang Zhenyu <zhenyu.z.wang@sna.com> + * Eric Anholt <eric@anholt.net> + * Carl Worth <cworth@redhat.com> + * Keith Packard <keithp@keithp.com> + * Chris Wilson <chris@chris-wilson.co.uk> + * + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <xf86.h> + +#include "sna.h" +#include "sna_reg.h" +#include "sna_render.h" +#include "sna_render_inline.h" +#include "sna_video.h" + +#include "gen4_render.h" + +#if DEBUG_RENDER +#undef DBG +#define DBG(x) ErrorF x +#else +#define NDEBUG 1 +#endif + +/* gen4 has a serious issue with its shaders that we need to flush + * after every rectangle... So until that is resolved, prefer + * the BLT engine. + */ +#define PREFER_BLT 1 + +#define FLUSH() do { \ + gen4_vertex_flush(sna); \ + OUT_BATCH(MI_FLUSH | MI_INHIBIT_RENDER_CACHE_FLUSH); \ +} while (0) + +#define GEN4_GRF_BLOCKS(nreg) ((nreg + 15) / 16 - 1) + +/* Set up a default static partitioning of the URB, which is supposed to + * allow anything we would want to do, at potentially lower performance. + */ +#define URB_CS_ENTRY_SIZE 1 +#define URB_CS_ENTRIES 0 + +#define URB_VS_ENTRY_SIZE 1 // each 512-bit row +#define URB_VS_ENTRIES 8 // we needs at least 8 entries + +#define URB_GS_ENTRY_SIZE 0 +#define URB_GS_ENTRIES 0 + +#define URB_CLIP_ENTRY_SIZE 0 +#define URB_CLIP_ENTRIES 0 + +#define URB_SF_ENTRY_SIZE 2 +#define URB_SF_ENTRIES 1 + +/* + * this program computes dA/dx and dA/dy for the texture coordinates along + * with the base texture coordinate. It was extracted from the Mesa driver + */ + +#define SF_KERNEL_NUM_GRF 16 +#define SF_MAX_THREADS 2 + +#define PS_KERNEL_NUM_GRF 32 +#define PS_MAX_THREADS 48 + +static const uint32_t sf_kernel[][4] = { +#include "exa_sf.g4b" +}; + +static const uint32_t sf_kernel_mask[][4] = { +#include "exa_sf_mask.g4b" +}; + +static const uint32_t ps_kernel_nomask_affine[][4] = { +#include "exa_wm_xy.g4b" +#include "exa_wm_src_affine.g4b" +#include "exa_wm_src_sample_argb.g4b" +#include "exa_wm_write.g4b" +}; + +static const uint32_t ps_kernel_nomask_projective[][4] = { +#include "exa_wm_xy.g4b" +#include "exa_wm_src_projective.g4b" +#include "exa_wm_src_sample_argb.g4b" +#include "exa_wm_write.g4b" +}; + +static const uint32_t ps_kernel_maskca_affine[][4] = { +#include "exa_wm_xy.g4b" +#include "exa_wm_src_affine.g4b" +#include "exa_wm_src_sample_argb.g4b" +#include "exa_wm_mask_affine.g4b" +#include "exa_wm_mask_sample_argb.g4b" +#include "exa_wm_ca.g4b" +#include "exa_wm_write.g4b" +}; + +static const uint32_t ps_kernel_maskca_projective[][4] = { +#include "exa_wm_xy.g4b" +#include "exa_wm_src_projective.g4b" +#include "exa_wm_src_sample_argb.g4b" +#include "exa_wm_mask_projective.g4b" +#include "exa_wm_mask_sample_argb.g4b" +#include "exa_wm_ca.g4b" +#include "exa_wm_write.g4b" +}; + +static const uint32_t ps_kernel_maskca_srcalpha_affine[][4] = { +#include "exa_wm_xy.g4b" +#include "exa_wm_src_affine.g4b" +#include "exa_wm_src_sample_a.g4b" +#include "exa_wm_mask_affine.g4b" +#include "exa_wm_mask_sample_argb.g4b" +#include "exa_wm_ca_srcalpha.g4b" +#include "exa_wm_write.g4b" +}; + +static const uint32_t ps_kernel_maskca_srcalpha_projective[][4] = { +#include "exa_wm_xy.g4b" +#include "exa_wm_src_projective.g4b" +#include "exa_wm_src_sample_a.g4b" +#include "exa_wm_mask_projective.g4b" +#include "exa_wm_mask_sample_argb.g4b" +#include "exa_wm_ca_srcalpha.g4b" +#include "exa_wm_write.g4b" +}; + +static const uint32_t ps_kernel_masknoca_affine[][4] = { +#include "exa_wm_xy.g4b" +#include "exa_wm_src_affine.g4b" +#include "exa_wm_src_sample_argb.g4b" +#include "exa_wm_mask_affine.g4b" +#include "exa_wm_mask_sample_a.g4b" +#include "exa_wm_noca.g4b" +#include "exa_wm_write.g4b" +}; + +static const uint32_t ps_kernel_masknoca_projective[][4] = { +#include "exa_wm_xy.g4b" +#include "exa_wm_src_projective.g4b" +#include "exa_wm_src_sample_argb.g4b" +#include "exa_wm_mask_projective.g4b" +#include "exa_wm_mask_sample_a.g4b" +#include "exa_wm_noca.g4b" +#include "exa_wm_write.g4b" +}; + +static const uint32_t ps_kernel_packed_static[][4] = { +#include "exa_wm_xy.g4b" +#include "exa_wm_src_affine.g4b" +#include "exa_wm_src_sample_argb.g4b" +#include "exa_wm_yuv_rgb.g4b" +#include "exa_wm_write.g4b" +}; + +static const uint32_t ps_kernel_planar_static[][4] = { +#include "exa_wm_xy.g4b" +#include "exa_wm_src_affine.g4b" +#include "exa_wm_src_sample_planar.g4b" +#include "exa_wm_yuv_rgb.g4b" +#include "exa_wm_write.g4b" +}; + +#define KERNEL(kernel_enum, kernel, masked) \ + [kernel_enum] = {&kernel, sizeof(kernel), masked} +static const struct wm_kernel_info { + const void *data; + unsigned int size; + Bool has_mask; +} wm_kernels[] = { + KERNEL(WM_KERNEL, ps_kernel_nomask_affine, FALSE), + KERNEL(WM_KERNEL_PROJECTIVE, ps_kernel_nomask_projective, FALSE), + + KERNEL(WM_KERNEL_MASK, ps_kernel_masknoca_affine, TRUE), + KERNEL(WM_KERNEL_MASK_PROJECTIVE, ps_kernel_masknoca_projective, TRUE), + + KERNEL(WM_KERNEL_MASKCA, ps_kernel_maskca_affine, TRUE), + KERNEL(WM_KERNEL_MASKCA_PROJECTIVE, ps_kernel_maskca_projective, TRUE), + + KERNEL(WM_KERNEL_MASKCA_SRCALPHA, + ps_kernel_maskca_srcalpha_affine, TRUE), + KERNEL(WM_KERNEL_MASKCA_SRCALPHA_PROJECTIVE, + ps_kernel_maskca_srcalpha_projective, TRUE), + + KERNEL(WM_KERNEL_VIDEO_PLANAR, ps_kernel_planar_static, FALSE), + KERNEL(WM_KERNEL_VIDEO_PACKED, ps_kernel_packed_static, FALSE), +}; +#undef KERNEL + +static const struct blendinfo { + Bool src_alpha; + uint32_t src_blend; + uint32_t dst_blend; +} gen4_blend_op[] = { + /* Clear */ {0, GEN4_BLENDFACTOR_ZERO, GEN4_BLENDFACTOR_ZERO}, + /* Src */ {0, GEN4_BLENDFACTOR_ONE, GEN4_BLENDFACTOR_ZERO}, + /* Dst */ {0, GEN4_BLENDFACTOR_ZERO, GEN4_BLENDFACTOR_ONE}, + /* Over */ {1, GEN4_BLENDFACTOR_ONE, GEN4_BLENDFACTOR_INV_SRC_ALPHA}, + /* OverReverse */ {0, GEN4_BLENDFACTOR_INV_DST_ALPHA, GEN4_BLENDFACTOR_ONE}, + /* In */ {0, GEN4_BLENDFACTOR_DST_ALPHA, GEN4_BLENDFACTOR_ZERO}, + /* InReverse */ {1, GEN4_BLENDFACTOR_ZERO, GEN4_BLENDFACTOR_SRC_ALPHA}, + /* Out */ {0, GEN4_BLENDFACTOR_INV_DST_ALPHA, GEN4_BLENDFACTOR_ZERO}, + /* OutReverse */ {1, GEN4_BLENDFACTOR_ZERO, GEN4_BLENDFACTOR_INV_SRC_ALPHA}, + /* Atop */ {1, GEN4_BLENDFACTOR_DST_ALPHA, GEN4_BLENDFACTOR_INV_SRC_ALPHA}, + /* AtopReverse */ {1, GEN4_BLENDFACTOR_INV_DST_ALPHA, GEN4_BLENDFACTOR_SRC_ALPHA}, + /* Xor */ {1, GEN4_BLENDFACTOR_INV_DST_ALPHA, GEN4_BLENDFACTOR_INV_SRC_ALPHA}, + /* Add */ {0, GEN4_BLENDFACTOR_ONE, GEN4_BLENDFACTOR_ONE}, +}; + +/** + * Highest-valued BLENDFACTOR used in gen4_blend_op. + * + * This leaves out GEN4_BLENDFACTOR_INV_DST_COLOR, + * GEN4_BLENDFACTOR_INV_CONST_{COLOR,ALPHA}, + * GEN4_BLENDFACTOR_INV_SRC1_{COLOR,ALPHA} + */ +#define GEN4_BLENDFACTOR_COUNT (GEN4_BLENDFACTOR_INV_DST_ALPHA + 1) + +static const struct formatinfo { + CARD32 pict_fmt; + uint32_t card_fmt; +} gen4_tex_formats[] = { + {PICT_a8, GEN4_SURFACEFORMAT_A8_UNORM}, + {PICT_a8r8g8b8, GEN4_SURFACEFORMAT_B8G8R8A8_UNORM}, + {PICT_x8r8g8b8, GEN4_SURFACEFORMAT_B8G8R8X8_UNORM}, + {PICT_a8b8g8r8, GEN4_SURFACEFORMAT_R8G8B8A8_UNORM}, + {PICT_x8b8g8r8, GEN4_SURFACEFORMAT_R8G8B8X8_UNORM}, + {PICT_r8g8b8, GEN4_SURFACEFORMAT_R8G8B8_UNORM}, + {PICT_r5g6b5, GEN4_SURFACEFORMAT_B5G6R5_UNORM}, + {PICT_a1r5g5b5, GEN4_SURFACEFORMAT_B5G5R5A1_UNORM}, + {PICT_a2r10g10b10, GEN4_SURFACEFORMAT_B10G10R10A2_UNORM}, + {PICT_x2r10g10b10, GEN4_SURFACEFORMAT_B10G10R10X2_UNORM}, + {PICT_a2b10g10r10, GEN4_SURFACEFORMAT_R10G10B10A2_UNORM}, + {PICT_x2r10g10b10, GEN4_SURFACEFORMAT_B10G10R10X2_UNORM}, + {PICT_a4r4g4b4, GEN4_SURFACEFORMAT_B4G4R4A4_UNORM}, +}; + +#define BLEND_OFFSET(s, d) \ + (((s) * GEN4_BLENDFACTOR_COUNT + (d)) * 64) + +#define SAMPLER_OFFSET(sf, se, mf, me, k) \ + ((((((sf) * EXTEND_COUNT + (se)) * FILTER_COUNT + (mf)) * EXTEND_COUNT + (me)) * KERNEL_COUNT + (k)) * 64) + +static bool +gen4_emit_pipelined_pointers(struct sna *sna, + const struct sna_composite_op *op, + int blend, int kernel); + +#define OUT_BATCH(v) batch_emit(sna, v) +#define OUT_VERTEX(x,y) vertex_emit_2s(sna, x,y) +#define OUT_VERTEX_F(v) vertex_emit(sna, v) + +static int +gen4_choose_composite_kernel(int op, Bool has_mask, Bool is_ca, Bool is_affine) +{ + int base; + + if (has_mask) { + if (is_ca) { + if (gen4_blend_op[op].src_alpha) + base = WM_KERNEL_MASKCA_SRCALPHA; + else + base = WM_KERNEL_MASKCA; + } else + base = WM_KERNEL_MASK; + } else + base = WM_KERNEL; + + return base + !is_affine; +} + +static void gen4_magic_ca_pass(struct sna *sna, + const struct sna_composite_op *op) +{ + struct gen4_render_state *state = &sna->render_state.gen4; + + if (!op->need_magic_ca_pass) + return; + + DBG(("%s: CA fixup\n", __FUNCTION__)); + + gen4_emit_pipelined_pointers + (sna, op, PictOpAdd, + gen4_choose_composite_kernel(PictOpAdd, + TRUE, TRUE, op->is_affine)); + + OUT_BATCH(GEN4_3DPRIMITIVE | + GEN4_3DPRIMITIVE_VERTEX_SEQUENTIAL | + (_3DPRIM_RECTLIST << GEN4_3DPRIMITIVE_TOPOLOGY_SHIFT) | + (0 << 9) | + 4); + OUT_BATCH(sna->render.vertex_index - sna->render.vertex_start); + OUT_BATCH(sna->render.vertex_start); + OUT_BATCH(1); /* single instance */ + OUT_BATCH(0); /* start instance location */ + OUT_BATCH(0); /* index buffer offset, ignored */ + + state->last_primitive = sna->kgem.nbatch; +} + +static void gen4_vertex_flush(struct sna *sna) +{ + if (sna->render_state.gen4.vertex_offset == 0) + return; + + DBG(("%s[%x] = %d\n", __FUNCTION__, + 4*sna->render_state.gen4.vertex_offset, + sna->render.vertex_index - sna->render.vertex_start)); + sna->kgem.batch[sna->render_state.gen4.vertex_offset] = + sna->render.vertex_index - sna->render.vertex_start; + sna->render_state.gen4.vertex_offset = 0; + + if (sna->render.op) + gen4_magic_ca_pass(sna, sna->render.op); +} + +static void gen4_vertex_finish(struct sna *sna, Bool last) +{ + struct kgem_bo *bo; + int i, delta; + + gen4_vertex_flush(sna); + if (!sna->render.vertex_used) + return; + + /* Note: we only need dword alignment (currently) */ + + if (last && sna->kgem.nbatch + sna->render.vertex_used <= sna->kgem.surface) { + DBG(("%s: copy to batch: %d @ %d\n", __FUNCTION__, + sna->render.vertex_used, sna->kgem.nbatch)); + memcpy(sna->kgem.batch + sna->kgem.nbatch, + sna->render.vertex_data, + sna->render.vertex_used * 4); + delta = sna->kgem.nbatch * 4; + bo = NULL; + sna->kgem.nbatch += sna->render.vertex_used; + } else { + bo = kgem_create_linear(&sna->kgem, 4*sna->render.vertex_used); + if (bo && !kgem_bo_write(&sna->kgem, bo, + sna->render.vertex_data, + 4*sna->render.vertex_used)) { + kgem_bo_destroy(&sna->kgem, bo); + return; + } + delta = 0; + DBG(("%s: new vbo: %d\n", __FUNCTION__, + sna->render.vertex_used)); + } + + for (i = 0; i < ARRAY_SIZE(sna->render.vertex_reloc); i++) { + if (sna->render.vertex_reloc[i]) { + DBG(("%s: reloc[%d] = %d\n", __FUNCTION__, + i, sna->render.vertex_reloc[i])); + + sna->kgem.batch[sna->render.vertex_reloc[i]] = + kgem_add_reloc(&sna->kgem, + sna->render.vertex_reloc[i], + bo, + I915_GEM_DOMAIN_VERTEX << 16, + delta); + sna->render.vertex_reloc[i] = 0; + } + } + + if (bo) + kgem_bo_destroy(&sna->kgem, bo); + + sna->render.vertex_used = 0; + sna->render.vertex_index = 0; + sna->render_state.gen4.vb_id = 0; +} + +static uint32_t gen4_get_blend(int op, + Bool has_component_alpha, + uint32_t dst_format) +{ + uint32_t src, dst; + + src = gen4_blend_op[op].src_blend; + dst = gen4_blend_op[op].dst_blend; + + /* If there's no dst alpha channel, adjust the blend op so that we'll treat + * it as always 1. + */ + if (PICT_FORMAT_A(dst_format) == 0) { + if (src == GEN4_BLENDFACTOR_DST_ALPHA) + src = GEN4_BLENDFACTOR_ONE; + else if (src == GEN4_BLENDFACTOR_INV_DST_ALPHA) + src = GEN4_BLENDFACTOR_ZERO; + } + + /* If the source alpha is being used, then we should only be in a + * case where the source blend factor is 0, and the source blend + * value is the mask channels multiplied by the source picture's alpha. + */ + if (has_component_alpha && gen4_blend_op[op].src_alpha) { + if (dst == GEN4_BLENDFACTOR_SRC_ALPHA) + dst = GEN4_BLENDFACTOR_SRC_COLOR; + else if (dst == GEN4_BLENDFACTOR_INV_SRC_ALPHA) + dst = GEN4_BLENDFACTOR_INV_SRC_COLOR; + } + + DBG(("blend op=%d, dst=%x [A=%d] => src=%d, dst=%d => offset=%x\n", + op, dst_format, PICT_FORMAT_A(dst_format), + src, dst, BLEND_OFFSET(src, dst))); + return BLEND_OFFSET(src, dst); +} + +static uint32_t gen4_get_dest_format(PictFormat format) +{ + switch (format) { + case PICT_a8r8g8b8: + case PICT_x8r8g8b8: + default: + return GEN4_SURFACEFORMAT_B8G8R8A8_UNORM; + case PICT_a8b8g8r8: + case PICT_x8b8g8r8: + return GEN4_SURFACEFORMAT_R8G8B8A8_UNORM; + case PICT_a2r10g10b10: + case PICT_x2r10g10b10: + return GEN4_SURFACEFORMAT_B10G10R10A2_UNORM; + case PICT_r5g6b5: + return GEN4_SURFACEFORMAT_B5G6R5_UNORM; + case PICT_x1r5g5b5: + case PICT_a1r5g5b5: + return GEN4_SURFACEFORMAT_B5G5R5A1_UNORM; + case PICT_a8: + return GEN4_SURFACEFORMAT_A8_UNORM; + case PICT_a4r4g4b4: + case PICT_x4r4g4b4: + return GEN4_SURFACEFORMAT_B4G4R4A4_UNORM; + } +} + +static Bool gen4_check_dst_format(PictFormat format) +{ + switch (format) { + case PICT_a8r8g8b8: + case PICT_x8r8g8b8: + case PICT_a8b8g8r8: + case PICT_x8b8g8r8: + case PICT_a2r10g10b10: + case PICT_x2r10g10b10: + case PICT_r5g6b5: + case PICT_x1r5g5b5: + case PICT_a1r5g5b5: + case PICT_a8: + case PICT_a4r4g4b4: + case PICT_x4r4g4b4: + return TRUE; + } + return FALSE; +} + +static bool gen4_format_is_dst(uint32_t format) +{ + switch (format) { + case GEN4_SURFACEFORMAT_B8G8R8A8_UNORM: + case GEN4_SURFACEFORMAT_R8G8B8A8_UNORM: + case GEN4_SURFACEFORMAT_B10G10R10A2_UNORM: + case GEN4_SURFACEFORMAT_B5G6R5_UNORM: + case GEN4_SURFACEFORMAT_B5G5R5A1_UNORM: + case GEN4_SURFACEFORMAT_A8_UNORM: + case GEN4_SURFACEFORMAT_B4G4R4A4_UNORM: + return true; + default: + return false; + } +} + +typedef struct gen4_surface_state_padded { + struct gen4_surface_state state; + char pad[32 - sizeof(struct gen4_surface_state)]; +} gen4_surface_state_padded; + +static void null_create(struct sna_static_stream *stream) +{ + /* A bunch of zeros useful for legacy border color and depth-stencil */ + sna_static_stream_map(stream, 64, 64); +} + +static void +sampler_state_init(struct gen4_sampler_state *sampler_state, + sampler_filter_t filter, + sampler_extend_t extend) +{ + sampler_state->ss0.lod_preclamp = 1; /* GL mode */ + + /* We use the legacy mode to get the semantics specified by + * the Render extension. */ + sampler_state->ss0.border_color_mode = GEN4_BORDER_COLOR_MODE_LEGACY; + + switch (filter) { + default: + case SAMPLER_FILTER_NEAREST: + sampler_state->ss0.min_filter = GEN4_MAPFILTER_NEAREST; + sampler_state->ss0.mag_filter = GEN4_MAPFILTER_NEAREST; + break; + case SAMPLER_FILTER_BILINEAR: + sampler_state->ss0.min_filter = GEN4_MAPFILTER_LINEAR; + sampler_state->ss0.mag_filter = GEN4_MAPFILTER_LINEAR; + break; + } + + switch (extend) { + default: + case SAMPLER_EXTEND_NONE: + sampler_state->ss1.r_wrap_mode = GEN4_TEXCOORDMODE_CLAMP_BORDER; + sampler_state->ss1.s_wrap_mode = GEN4_TEXCOORDMODE_CLAMP_BORDER; + sampler_state->ss1.t_wrap_mode = GEN4_TEXCOORDMODE_CLAMP_BORDER; + break; + case SAMPLER_EXTEND_REPEAT: + sampler_state->ss1.r_wrap_mode = GEN4_TEXCOORDMODE_WRAP; + sampler_state->ss1.s_wrap_mode = GEN4_TEXCOORDMODE_WRAP; + sampler_state->ss1.t_wrap_mode = GEN4_TEXCOORDMODE_WRAP; + break; + case SAMPLER_EXTEND_PAD: + sampler_state->ss1.r_wrap_mode = GEN4_TEXCOORDMODE_CLAMP; + sampler_state->ss1.s_wrap_mode = GEN4_TEXCOORDMODE_CLAMP; + sampler_state->ss1.t_wrap_mode = GEN4_TEXCOORDMODE_CLAMP; + break; + case SAMPLER_EXTEND_REFLECT: + sampler_state->ss1.r_wrap_mode = GEN4_TEXCOORDMODE_MIRROR; + sampler_state->ss1.s_wrap_mode = GEN4_TEXCOORDMODE_MIRROR; + sampler_state->ss1.t_wrap_mode = GEN4_TEXCOORDMODE_MIRROR; + break; + } +} + +static uint32_t gen4_get_card_format(PictFormat format) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(gen4_tex_formats); i++) { + if (gen4_tex_formats[i].pict_fmt == format) + return gen4_tex_formats[i].card_fmt; + } + return -1; +} + +static uint32_t gen4_filter(uint32_t filter) +{ + switch (filter) { + default: + assert(0); + case PictFilterNearest: + return SAMPLER_FILTER_NEAREST; + case PictFilterBilinear: + return SAMPLER_FILTER_BILINEAR; + } +} + +static uint32_t gen4_check_filter(PicturePtr picture) +{ + switch (picture->filter) { + case PictFilterNearest: + case PictFilterBilinear: + return TRUE; + default: + return FALSE; + } +} + +static uint32_t gen4_repeat(uint32_t repeat) +{ + switch (repeat) { + default: + assert(0); + case RepeatNone: + return SAMPLER_EXTEND_NONE; + case RepeatNormal: + return SAMPLER_EXTEND_REPEAT; + case RepeatPad: + return SAMPLER_EXTEND_PAD; + case RepeatReflect: + return SAMPLER_EXTEND_REFLECT; + } +} + +static bool gen4_check_repeat(PicturePtr picture) +{ + if (!picture->repeat) + return TRUE; + + switch (picture->repeatType) { + case RepeatNone: + case RepeatNormal: + case RepeatPad: + case RepeatReflect: + return TRUE; + default: + return FALSE; + } +} + +/** + * Sets up the common fields for a surface state buffer for the given + * picture in the given surface state buffer. + */ +static int +gen4_bind_bo(struct sna *sna, + struct kgem_bo *bo, + uint32_t width, + uint32_t height, + uint32_t format, + Bool is_dst) +{ + struct gen4_surface_state *ss; + uint32_t domains; + uint16_t offset; + + /* After the first bind, we manage the cache domains within the batch */ + if (is_dst) { + domains = I915_GEM_DOMAIN_RENDER << 16 | I915_GEM_DOMAIN_RENDER; + kgem_bo_mark_dirty(bo); + } else { + domains = I915_GEM_DOMAIN_SAMPLER << 16; + is_dst = gen4_format_is_dst(format); + } + + offset = sna->kgem.surface - sizeof(struct gen4_surface_state_padded) / sizeof(uint32_t); + offset *= sizeof(uint32_t); + + if (is_dst) { + if (bo->dst_bound) + return bo->dst_bound; + + bo->dst_bound = offset; + } else { + if (bo->src_bound) + return bo->src_bound; + + bo->src_bound = offset; + } + + sna->kgem.surface -= + sizeof(struct gen4_surface_state_padded) / sizeof(uint32_t); + ss = memset(sna->kgem.batch + sna->kgem.surface, 0, sizeof(*ss)); + + ss->ss0.surface_type = GEN4_SURFACE_2D; + ss->ss0.surface_format = format; + + ss->ss0.data_return_format = GEN4_SURFACERETURNFORMAT_FLOAT32; + ss->ss0.color_blend = 1; + ss->ss1.base_addr = + kgem_add_reloc(&sna->kgem, + sna->kgem.surface + 1, + bo, domains, 0); + + ss->ss2.height = height - 1; + ss->ss2.width = width - 1; + ss->ss3.pitch = bo->pitch - 1; + ss->ss3.tiled_surface = bo->tiling != I915_TILING_NONE; + ss->ss3.tile_walk = bo->tiling == I915_TILING_Y; + + DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> %s\n", + offset, bo->handle, ss->ss1.base_addr, + ss->ss0.surface_format, width, height, bo->pitch, bo->tiling, + domains & 0xffff ? "render" : "sampler")); + + return offset; +} + +fastcall static void +gen4_emit_composite_primitive_solid(struct sna *sna, + const struct sna_composite_op *op, + const struct sna_composite_rectangles *r) +{ + float *v; + union { + struct sna_coordinate p; + float f; + } dst; + + v = sna->render.vertex_data + sna->render.vertex_used; + sna->render.vertex_used += 9; + + dst.p.x = r->dst.x + r->width; + dst.p.y = r->dst.y + r->height; + v[0] = dst.f; + v[1] = 1.; + v[2] = 1.; + + dst.p.x = r->dst.x; + v[3] = dst.f; + v[4] = 0.; + v[5] = 1.; + + dst.p.y = r->dst.y; + v[6] = dst.f; + v[7] = 0.; + v[8] = 0.; +} + +fastcall static void +gen4_emit_composite_primitive_identity_source(struct sna *sna, + const struct sna_composite_op *op, + const struct sna_composite_rectangles *r) +{ + const float *sf = op->src.scale; + float sx, sy, *v; + union { + struct sna_coordinate p; + float f; + } dst; + + v = sna->render.vertex_data + sna->render.vertex_used; + sna->render.vertex_used += 9; + + sx = r->src.x + op->src.offset[0]; + sy = r->src.y + op->src.offset[1]; + + dst.p.x = r->dst.x + r->width; + dst.p.y = r->dst.y + r->height; + v[0] = dst.f; + v[1] = (sx + r->width) * sf[0]; + v[2] = (sy + r->height) * sf[1]; + + dst.p.x = r->dst.x; + v[3] = dst.f; + v[4] = sx * sf[0]; + v[5] = v[2]; + + dst.p.y = r->dst.y; + v[6] = dst.f; + v[7] = v[4]; + v[8] = sy * sf[1]; +} + +fastcall static void +gen4_emit_composite_primitive_affine_source(struct sna *sna, + const struct sna_composite_op *op, + const struct sna_composite_rectangles *r) +{ + union { + struct sna_coordinate p; + float f; + } dst; + float *v; + + v = sna->render.vertex_data + sna->render.vertex_used; + sna->render.vertex_used += 9; + + dst.p.x = r->dst.x + r->width; + dst.p.y = r->dst.y + r->height; + v[0] = dst.f; + _sna_get_transformed_coordinates(op->src.offset[0] + r->src.x + r->width, + op->src.offset[1] + r->src.y + r->height, + op->src.transform, + &v[1], &v[2]); + v[1] *= op->src.scale[0]; + v[2] *= op->src.scale[1]; + + dst.p.x = r->dst.x; + v[3] = dst.f; + _sna_get_transformed_coordinates(op->src.offset[0] + r->src.x, + op->src.offset[1] + r->src.y + r->height, + op->src.transform, + &v[4], &v[5]); + v[4] *= op->src.scale[0]; + v[5] *= op->src.scale[1]; + + dst.p.y = r->dst.y; + v[6] = dst.f; + _sna_get_transformed_coordinates(op->src.offset[0] + r->src.x, + op->src.offset[1] + r->src.y, + op->src.transform, + &v[7], &v[8]); + v[7] *= op->src.scale[0]; + v[8] *= op->src.scale[1]; +} + +fastcall static void +gen4_emit_composite_primitive_identity_source_mask(struct sna *sna, + const struct sna_composite_op *op, + const struct sna_composite_rectangles *r) +{ + union { + struct sna_coordinate p; + float f; + } dst; + float src_x, src_y; + float msk_x, msk_y; + float w, h; + float *v; + + src_x = r->src.x + op->src.offset[0]; + src_y = r->src.y + op->src.offset[1]; + msk_x = r->mask.x + op->mask.offset[0]; + msk_y = r->mask.y + op->mask.offset[1]; + w = r->width; + h = r->height; + + v = sna->render.vertex_data + sna->render.vertex_used; + sna->render.vertex_used += 15; + + dst.p.x = r->dst.x + r->width; + dst.p.y = r->dst.y + r->height; + v[0] = dst.f; + v[1] = (src_x + w) * op->src.scale[0]; + v[2] = (src_y + h) * op->src.scale[1]; + v[3] = (msk_x + w) * op->mask.scale[0]; + v[4] = (msk_y + h) * op->mask.scale[1]; + + dst.p.x = r->dst.x; + v[5] = dst.f; + v[6] = src_x * op->src.scale[0]; + v[7] = v[2]; + v[8] = msk_x * op->mask.scale[0]; + v[9] = v[4]; + + dst.p.y = r->dst.y; + v[10] = dst.f; + v[11] = v[6]; + v[12] = src_y * op->src.scale[1]; + v[13] = v[8]; + v[14] = msk_y * op->mask.scale[1]; +} + +fastcall static void +gen4_emit_composite_primitive(struct sna *sna, + const struct sna_composite_op *op, + const struct sna_composite_rectangles *r) +{ + float src_x[3], src_y[3], src_w[3], mask_x[3], mask_y[3], mask_w[3]; + Bool is_affine = op->is_affine; + const float *src_sf = op->src.scale; + const float *mask_sf = op->mask.scale; + + if (is_affine) { + sna_get_transformed_coordinates(r->src.x + op->src.offset[0], + r->src.y + op->src.offset[1], + op->src.transform, + &src_x[0], + &src_y[0]); + + sna_get_transformed_coordinates(r->src.x + op->src.offset[0], + r->src.y + op->src.offset[1] + r->height, + op->src.transform, + &src_x[1], + &src_y[1]); + + sna_get_transformed_coordinates(r->src.x + op->src.offset[0] + r->width, + r->src.y + op->src.offset[1] + r->height, + op->src.transform, + &src_x[2], + &src_y[2]); + } else { + if (!sna_get_transformed_coordinates_3d(r->src.x + op->src.offset[0], + r->src.y + op->src.offset[1], + op->src.transform, + &src_x[0], + &src_y[0], + &src_w[0])) + return; + + if (!sna_get_transformed_coordinates_3d(r->src.x + op->src.offset[0], + r->src.y + op->src.offset[1] + r->height, + op->src.transform, + &src_x[1], + &src_y[1], + &src_w[1])) + return; + + if (!sna_get_transformed_coordinates_3d(r->src.x + op->src.offset[0] + r->width, + r->src.y + op->src.offset[1] + r->height, + op->src.transform, + &src_x[2], + &src_y[2], + &src_w[2])) + return; + } + + if (op->mask.bo) { + if (is_affine) { + sna_get_transformed_coordinates(r->mask.x + op->mask.offset[0], + r->mask.y + op->mask.offset[1], + op->mask.transform, + &mask_x[0], + &mask_y[0]); + + sna_get_transformed_coordinates(r->mask.x + op->mask.offset[0], + r->mask.y + op->mask.offset[1] + r->height, + op->mask.transform, + &mask_x[1], + &mask_y[1]); + + sna_get_transformed_coordinates(r->mask.x + op->mask.offset[0] + r->width, + r->mask.y + op->mask.offset[1] + r->height, + op->mask.transform, + &mask_x[2], + &mask_y[2]); + } else { + if (!sna_get_transformed_coordinates_3d(r->mask.x + op->mask.offset[0], + r->mask.y + op->mask.offset[1], + op->mask.transform, + &mask_x[0], + &mask_y[0], + &mask_w[0])) + return; + + if (!sna_get_transformed_coordinates_3d(r->mask.x + op->mask.offset[0], + r->mask.y + op->mask.offset[1] + r->height, + op->mask.transform, + &mask_x[1], + &mask_y[1], + &mask_w[1])) + return; + + if (!sna_get_transformed_coordinates_3d(r->mask.x + op->mask.offset[0] + r->width, + r->mask.y + op->mask.offset[1] + r->height, + op->mask.transform, + &mask_x[2], + &mask_y[2], + &mask_w[2])) + return; + } + } + + OUT_VERTEX(r->dst.x + r->width, r->dst.y + r->height); + OUT_VERTEX_F(src_x[2] * src_sf[0]); + OUT_VERTEX_F(src_y[2] * src_sf[1]); + if (!is_affine) + OUT_VERTEX_F(src_w[2]); + if (op->mask.bo) { + OUT_VERTEX_F(mask_x[2] * mask_sf[0]); + OUT_VERTEX_F(mask_y[2] * mask_sf[1]); + if (!is_affine) + OUT_VERTEX_F(mask_w[2]); + } + + OUT_VERTEX(r->dst.x, r->dst.y + r->height); + OUT_VERTEX_F(src_x[1] * src_sf[0]); + OUT_VERTEX_F(src_y[1] * src_sf[1]); + if (!is_affine) + OUT_VERTEX_F(src_w[1]); + if (op->mask.bo) { + OUT_VERTEX_F(mask_x[1] * mask_sf[0]); + OUT_VERTEX_F(mask_y[1] * mask_sf[1]); + if (!is_affine) + OUT_VERTEX_F(mask_w[1]); + } + + OUT_VERTEX(r->dst.x, r->dst.y); + OUT_VERTEX_F(src_x[0] * src_sf[0]); + OUT_VERTEX_F(src_y[0] * src_sf[1]); + if (!is_affine) + OUT_VERTEX_F(src_w[0]); + if (op->mask.bo) { + OUT_VERTEX_F(mask_x[0] * mask_sf[0]); + OUT_VERTEX_F(mask_y[0] * mask_sf[1]); + if (!is_affine) + OUT_VERTEX_F(mask_w[0]); + } +} + +static void gen4_emit_vertex_buffer(struct sna *sna, + const struct sna_composite_op *op) +{ + int id = op->u.gen4.ve_id; + + OUT_BATCH(GEN4_3DSTATE_VERTEX_BUFFERS | 3); + OUT_BATCH((id << VB0_BUFFER_INDEX_SHIFT) | VB0_VERTEXDATA | + (4*op->floats_per_vertex << VB0_BUFFER_PITCH_SHIFT)); + sna->render.vertex_reloc[id] = sna->kgem.nbatch; + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + + sna->render_state.gen4.vb_id |= 1 << id; +} + +static void gen4_emit_primitive(struct sna *sna) +{ + if (sna->kgem.nbatch == sna->render_state.gen4.last_primitive) { + sna->render_state.gen4.vertex_offset = sna->kgem.nbatch - 5; + return; + } + + OUT_BATCH(GEN4_3DPRIMITIVE | + GEN4_3DPRIMITIVE_VERTEX_SEQUENTIAL | + (_3DPRIM_RECTLIST << GEN4_3DPRIMITIVE_TOPOLOGY_SHIFT) | + (0 << 9) | + 4); + sna->render_state.gen4.vertex_offset = sna->kgem.nbatch; + OUT_BATCH(0); /* vertex count, to be filled in later */ + OUT_BATCH(sna->render.vertex_index); + OUT_BATCH(1); /* single instance */ + OUT_BATCH(0); /* start instance location */ + OUT_BATCH(0); /* index buffer offset, ignored */ + sna->render.vertex_start = sna->render.vertex_index; + + sna->render_state.gen4.last_primitive = sna->kgem.nbatch; +} + +static bool gen4_rectangle_begin(struct sna *sna, + const struct sna_composite_op *op) +{ + int id = op->u.gen4.ve_id; + int ndwords; + + ndwords = 0; + if ((sna->render_state.gen4.vb_id & (1 << id)) == 0) + ndwords += 5; + if (sna->render_state.gen4.vertex_offset == 0) + ndwords += op->need_magic_ca_pass ? 20 : 6; + if (ndwords == 0) + return true; + + if (!kgem_check_batch(&sna->kgem, ndwords)) + return false; + + if ((sna->render_state.gen4.vb_id & (1 << id)) == 0) + gen4_emit_vertex_buffer(sna, op); + if (sna->render_state.gen4.vertex_offset == 0) + gen4_emit_primitive(sna); + + return true; +} + +static int gen4_get_rectangles__flush(struct sna *sna) +{ + if (!kgem_check_batch(&sna->kgem, 25)) + return 0; + if (sna->kgem.nexec > KGEM_EXEC_SIZE(&sna->kgem) - 1) + return 0; + if (sna->kgem.nreloc > KGEM_RELOC_SIZE(&sna->kgem) - 1) + return 0; + + gen4_vertex_finish(sna, FALSE); + sna->render.vertex_index = 0; + + return ARRAY_SIZE(sna->render.vertex_data); +} + +inline static int gen4_get_rectangles(struct sna *sna, + const struct sna_composite_op *op, + int want) +{ + int rem = vertex_space(sna); + + if (rem < 3*op->floats_per_vertex) { + DBG(("flushing vbo for %s: %d < %d\n", + __FUNCTION__, rem, 3*op->floats_per_vertex)); + rem = gen4_get_rectangles__flush(sna); + if (rem == 0) + return 0; + } + + if (!gen4_rectangle_begin(sna, op)) + return 0; + + if (want * op->floats_per_vertex*3 > rem) + want = rem / (3*op->floats_per_vertex); + + sna->render.vertex_index += 3*want; + return want; +} + +static uint32_t *gen4_composite_get_binding_table(struct sna *sna, + const struct sna_composite_op *op, + uint16_t *offset) +{ + uint32_t *table; + + sna->kgem.surface -= + sizeof(struct gen4_surface_state_padded) / sizeof(uint32_t); + /* Clear all surplus entries to zero in case of prefetch */ + table = memset(sna->kgem.batch + sna->kgem.surface, + 0, sizeof(struct gen4_surface_state_padded)); + + *offset = sna->kgem.surface; + + DBG(("%s(%x)\n", __FUNCTION__, 4*sna->kgem.surface)); + + return table; +} + +static void +gen4_emit_sip(struct sna *sna) +{ + /* Set system instruction pointer */ + OUT_BATCH(GEN4_STATE_SIP | 0); + OUT_BATCH(0); +} + +static void +gen4_emit_urb(struct sna *sna) +{ + int urb_vs_start, urb_vs_size; + int urb_gs_start, urb_gs_size; + int urb_clip_start, urb_clip_size; + int urb_sf_start, urb_sf_size; + int urb_cs_start, urb_cs_size; + + urb_vs_start = 0; + urb_vs_size = URB_VS_ENTRIES * URB_VS_ENTRY_SIZE; + urb_gs_start = urb_vs_start + urb_vs_size; + urb_gs_size = URB_GS_ENTRIES * URB_GS_ENTRY_SIZE; + urb_clip_start = urb_gs_start + urb_gs_size; + urb_clip_size = URB_CLIP_ENTRIES * URB_CLIP_ENTRY_SIZE; + urb_sf_start = urb_clip_start + urb_clip_size; + urb_sf_size = URB_SF_ENTRIES * URB_SF_ENTRY_SIZE; + urb_cs_start = urb_sf_start + urb_sf_size; + urb_cs_size = URB_CS_ENTRIES * URB_CS_ENTRY_SIZE; + + OUT_BATCH(GEN4_URB_FENCE | + UF0_CS_REALLOC | + UF0_SF_REALLOC | + UF0_CLIP_REALLOC | + UF0_GS_REALLOC | + UF0_VS_REALLOC | + 1); + OUT_BATCH(((urb_clip_start + urb_clip_size) << UF1_CLIP_FENCE_SHIFT) | + ((urb_gs_start + urb_gs_size) << UF1_GS_FENCE_SHIFT) | + ((urb_vs_start + urb_vs_size) << UF1_VS_FENCE_SHIFT)); + OUT_BATCH(((urb_cs_start + urb_cs_size) << UF2_CS_FENCE_SHIFT) | + ((urb_sf_start + urb_sf_size) << UF2_SF_FENCE_SHIFT)); + + /* Constant buffer state */ + OUT_BATCH(GEN4_CS_URB_STATE | 0); + OUT_BATCH((URB_CS_ENTRY_SIZE - 1) << 4 | URB_CS_ENTRIES << 0); +} + +static void +gen4_emit_state_base_address(struct sna *sna) +{ + assert(sna->render_state.gen4.general_bo->proxy == NULL); + OUT_BATCH(GEN4_STATE_BASE_ADDRESS | 4); + OUT_BATCH(kgem_add_reloc(&sna->kgem, /* general */ + sna->kgem.nbatch, + sna->render_state.gen4.general_bo, + I915_GEM_DOMAIN_INSTRUCTION << 16, + BASE_ADDRESS_MODIFY)); + OUT_BATCH(kgem_add_reloc(&sna->kgem, /* surface */ + sna->kgem.nbatch, + NULL, + I915_GEM_DOMAIN_INSTRUCTION << 16, + BASE_ADDRESS_MODIFY)); + OUT_BATCH(0); /* media */ + + /* upper bounds, all disabled */ + OUT_BATCH(BASE_ADDRESS_MODIFY); + OUT_BATCH(0); +} + +static void +gen4_emit_invariant(struct sna *sna) +{ + OUT_BATCH(MI_FLUSH | MI_INHIBIT_RENDER_CACHE_FLUSH); + if (sna->kgem.gen >= 45) + OUT_BATCH(NEW_PIPELINE_SELECT | PIPELINE_SELECT_3D); + else + OUT_BATCH(GEN4_PIPELINE_SELECT | PIPELINE_SELECT_3D); + + gen4_emit_sip(sna); + gen4_emit_state_base_address(sna); + + sna->render_state.gen4.needs_invariant = FALSE; +} + +static void +gen4_get_batch(struct sna *sna) +{ + kgem_set_mode(&sna->kgem, KGEM_RENDER); + + if (!kgem_check_batch_with_surfaces(&sna->kgem, 150, 4)) { + DBG(("%s: flushing batch: %d < %d+%d\n", + __FUNCTION__, sna->kgem.surface - sna->kgem.nbatch, + 150, 4*8)); + kgem_submit(&sna->kgem); + } + + if (sna->render_state.gen4.needs_invariant) + gen4_emit_invariant(sna); +} + +static void +gen4_align_vertex(struct sna *sna, const struct sna_composite_op *op) +{ + if (op->floats_per_vertex != sna->render_state.gen4.floats_per_vertex) { + DBG(("aligning vertex: was %d, now %d floats per vertex, %d->%d\n", + sna->render_state.gen4.floats_per_vertex, + op->floats_per_vertex, + sna->render.vertex_index, + (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex)); + sna->render.vertex_index = (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex; + sna->render.vertex_used = sna->render.vertex_index * op->floats_per_vertex; + sna->render_state.gen4.floats_per_vertex = op->floats_per_vertex; + } +} + +static void +gen4_emit_binding_table(struct sna *sna, uint16_t offset) +{ + if (sna->render_state.gen4.surface_table == offset) + return; + + sna->render_state.gen4.surface_table = offset; + + /* Binding table pointers */ + OUT_BATCH(GEN4_3DSTATE_BINDING_TABLE_POINTERS | 4); + OUT_BATCH(0); /* vs */ + OUT_BATCH(0); /* gs */ + OUT_BATCH(0); /* clip */ + OUT_BATCH(0); /* sf */ + /* Only the PS uses the binding table */ + OUT_BATCH(offset*4); +} + +static bool +gen4_emit_pipelined_pointers(struct sna *sna, + const struct sna_composite_op *op, + int blend, int kernel) +{ + uint16_t offset = sna->kgem.nbatch, last; + + OUT_BATCH(GEN4_3DSTATE_PIPELINED_POINTERS | 5); + OUT_BATCH(sna->render_state.gen4.vs); + OUT_BATCH(GEN4_GS_DISABLE); /* passthrough */ + OUT_BATCH(GEN4_CLIP_DISABLE); /* passthrough */ + OUT_BATCH(sna->render_state.gen4.sf[op->mask.bo != NULL]); + OUT_BATCH(sna->render_state.gen4.wm + + SAMPLER_OFFSET(op->src.filter, op->src.repeat, + op->mask.filter, op->mask.repeat, + kernel)); + OUT_BATCH(sna->render_state.gen4.cc + + gen4_get_blend(blend, op->has_component_alpha, op->dst.format)); + + last = sna->render_state.gen4.last_pipelined_pointers; + if (last && + sna->kgem.batch[offset + 1] == sna->kgem.batch[last + 1] && + sna->kgem.batch[offset + 3] == sna->kgem.batch[last + 3] && + sna->kgem.batch[offset + 4] == sna->kgem.batch[last + 4] && + sna->kgem.batch[offset + 5] == sna->kgem.batch[last + 5] && + sna->kgem.batch[offset + 6] == sna->kgem.batch[last + 6]) { + sna->kgem.nbatch = offset; + return false; + } else { + sna->render_state.gen4.last_pipelined_pointers = offset; + return true; + } +} + +static void +gen4_emit_drawing_rectangle(struct sna *sna, const struct sna_composite_op *op) +{ + uint32_t limit = (op->dst.height - 1) << 16 | (op->dst.width - 1); + uint32_t offset = (uint16_t)op->dst.y << 16 | (uint16_t)op->dst.x; + + if (sna->render_state.gen4.drawrect_limit == limit && + sna->render_state.gen4.drawrect_offset == offset) + return; + sna->render_state.gen4.drawrect_offset = offset; + sna->render_state.gen4.drawrect_limit = limit; + + OUT_BATCH(GEN4_3DSTATE_DRAWING_RECTANGLE | (4 - 2)); + OUT_BATCH(0x00000000); + OUT_BATCH(limit); + OUT_BATCH(offset); +} + +static void +gen4_emit_vertex_elements(struct sna *sna, + const struct sna_composite_op *op) +{ + /* + * vertex data in vertex buffer + * position: (x, y) + * texture coordinate 0: (u0, v0) if (is_affine is TRUE) else (u0, v0, w0) + * texture coordinate 1 if (has_mask is TRUE): same as above + */ + struct gen4_render_state *render = &sna->render_state.gen4; + Bool has_mask = op->mask.bo != NULL; + int nelem = has_mask ? 2 : 1; + int selem; + uint32_t w_component; + uint32_t src_format; + int id = op->u.gen4.ve_id; + + if (render->ve_id == id) + return; + + render->ve_id = id; + + if (op->is_affine) { + src_format = GEN4_SURFACEFORMAT_R32G32_FLOAT; + w_component = GEN4_VFCOMPONENT_STORE_1_FLT; + selem = 2; + } else { + src_format = GEN4_SURFACEFORMAT_R32G32B32_FLOAT; + w_component = GEN4_VFCOMPONENT_STORE_SRC; + selem = 3; + } + + /* The VUE layout + * dword 0-3: position (x, y, 1.0, 1.0), + * dword 4-7: texture coordinate 0 (u0, v0, w0, 1.0) + * [optional] dword 8-11: texture coordinate 1 (u1, v1, w1, 1.0) + */ + OUT_BATCH(GEN4_3DSTATE_VERTEX_ELEMENTS | (2 * (1 + nelem) - 1)); + + /* x,y */ + OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID | + GEN4_SURFACEFORMAT_R16G16_SSCALED << VE0_FORMAT_SHIFT | + 0 << VE0_OFFSET_SHIFT); /* offsets vb in bytes */ + OUT_BATCH(GEN4_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT | + GEN4_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT | + GEN4_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT | + GEN4_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT | + (1*4) << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT); /* VUE offset in dwords */ + + /* u0, v0, w0 */ + OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID | + src_format << VE0_FORMAT_SHIFT | + 4 << VE0_OFFSET_SHIFT); /* offset vb in bytes */ + OUT_BATCH(GEN4_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT | + GEN4_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT | + w_component << VE1_VFCOMPONENT_2_SHIFT | + GEN4_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT | + (2*4) << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT); /* VUE offset in dwords */ + + /* u1, v1, w1 */ + if (has_mask) { + OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID | + src_format << VE0_FORMAT_SHIFT | + ((1 + selem) * 4) << VE0_OFFSET_SHIFT); /* vb offset in bytes */ + OUT_BATCH(GEN4_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT | + GEN4_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT | + w_component << VE1_VFCOMPONENT_2_SHIFT | + GEN4_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT | + (3*4) << VE1_DESTINATION_ELEMENT_OFFSET_SHIFT); /* VUE offset in dwords */ + } +} + +static void +gen4_emit_state(struct sna *sna, + const struct sna_composite_op *op, + uint16_t wm_binding_table) +{ + gen4_emit_binding_table(sna, wm_binding_table); + if (gen4_emit_pipelined_pointers(sna, op, op->op, op->u.gen4.wm_kernel)) + gen4_emit_urb(sna); + gen4_emit_vertex_elements(sna, op); + gen4_emit_drawing_rectangle(sna, op); +} + +static void +gen4_bind_surfaces(struct sna *sna, + const struct sna_composite_op *op) +{ + uint32_t *binding_table; + uint16_t offset; + + gen4_get_batch(sna); + + binding_table = gen4_composite_get_binding_table(sna, op, &offset); + + binding_table[0] = + gen4_bind_bo(sna, + op->dst.bo, op->dst.width, op->dst.height, + gen4_get_dest_format(op->dst.format), + TRUE); + binding_table[1] = + gen4_bind_bo(sna, + op->src.bo, op->src.width, op->src.height, + op->src.card_format, + FALSE); + if (op->mask.bo) + binding_table[2] = + gen4_bind_bo(sna, + op->mask.bo, + op->mask.width, + op->mask.height, + op->mask.card_format, + FALSE); + + gen4_emit_state(sna, op, offset); +} + +fastcall static void +gen4_render_composite_blt(struct sna *sna, + const struct sna_composite_op *op, + const struct sna_composite_rectangles *r) +{ + DBG(("%s: src=(%d, %d)+(%d, %d), mask=(%d, %d)+(%d, %d), dst=(%d, %d)+(%d, %d), size=(%d, %d)\n", + __FUNCTION__, + r->src.x, r->src.y, op->src.offset[0], op->src.offset[1], + r->mask.x, r->mask.y, op->mask.offset[0], op->mask.offset[1], + r->dst.x, r->dst.y, op->dst.x, op->dst.y, + r->width, r->height)); + + if (!gen4_get_rectangles(sna, op, 1)) { + gen4_bind_surfaces(sna, op); + gen4_get_rectangles(sna, op, 1); + } + + op->prim_emit(sna, op, r); + + /* XXX are the shaders fubar? */ + FLUSH(); +} + +static void +gen4_render_composite_boxes(struct sna *sna, + const struct sna_composite_op *op, + const BoxRec *box, int nbox) +{ + DBG(("%s(%d) delta=(%d, %d), src=(%d, %d)/(%d, %d), mask=(%d, %d)/(%d, %d)\n", + __FUNCTION__, nbox, op->dst.x, op->dst.y, + op->src.offset[0], op->src.offset[1], + op->src.width, op->src.height, + op->mask.offset[0], op->mask.offset[1], + op->mask.width, op->mask.height)); + + do { + struct sna_composite_rectangles r; + + r.dst.x = box->x1; + r.dst.y = box->y1; + r.width = box->x2 - box->x1; + r.height = box->y2 - box->y1; + r.mask = r.src = r.dst; + gen4_render_composite_blt(sna, op, &r); + box++; + } while (--nbox); +} + +#ifndef MAX +#define MAX(a,b) ((a) > (b) ? (a) : (b)) +#endif + +static uint32_t gen4_bind_video_source(struct sna *sna, + struct kgem_bo *src_bo, + uint32_t src_offset, + int src_width, + int src_height, + int src_pitch, + uint32_t src_surf_format) +{ + struct gen4_surface_state *ss; + + sna->kgem.surface -= sizeof(struct gen4_surface_state_padded) / sizeof(uint32_t); + + ss = memset(sna->kgem.batch + sna->kgem.surface, 0, sizeof(*ss)); + ss->ss0.surface_type = GEN4_SURFACE_2D; + ss->ss0.surface_format = src_surf_format; + ss->ss0.color_blend = 1; + + ss->ss1.base_addr = + kgem_add_reloc(&sna->kgem, + sna->kgem.surface + 1, + src_bo, + I915_GEM_DOMAIN_SAMPLER << 16, + src_offset); + + ss->ss2.width = src_width - 1; + ss->ss2.height = src_height - 1; + ss->ss3.pitch = src_pitch - 1; + + return sna->kgem.surface * sizeof(uint32_t); +} + +static void gen4_video_bind_surfaces(struct sna *sna, + const struct sna_composite_op *op, + struct sna_video_frame *frame) +{ + uint32_t src_surf_format; + uint32_t src_surf_base[6]; + int src_width[6]; + int src_height[6]; + int src_pitch[6]; + uint32_t *binding_table; + uint16_t offset; + int n_src, n; + + src_surf_base[0] = frame->YBufOffset; + src_surf_base[1] = frame->YBufOffset; + src_surf_base[2] = frame->VBufOffset; + src_surf_base[3] = frame->VBufOffset; + src_surf_base[4] = frame->UBufOffset; + src_surf_base[5] = frame->UBufOffset; + + if (is_planar_fourcc(frame->id)) { + src_surf_format = GEN4_SURFACEFORMAT_R8_UNORM; + src_width[1] = src_width[0] = frame->width; + src_height[1] = src_height[0] = frame->height; + src_pitch[1] = src_pitch[0] = frame->pitch[1]; + src_width[4] = src_width[5] = src_width[2] = src_width[3] = + frame->width / 2; + src_height[4] = src_height[5] = src_height[2] = src_height[3] = + frame->height / 2; + src_pitch[4] = src_pitch[5] = src_pitch[2] = src_pitch[3] = + frame->pitch[0]; + n_src = 6; + } else { + if (frame->id == FOURCC_UYVY) + src_surf_format = GEN4_SURFACEFORMAT_YCRCB_SWAPY; + else + src_surf_format = GEN4_SURFACEFORMAT_YCRCB_NORMAL; + + src_width[0] = frame->width; + src_height[0] = frame->height; + src_pitch[0] = frame->pitch[0]; + n_src = 1; + } + + gen4_get_batch(sna); + + binding_table = gen4_composite_get_binding_table(sna, op, &offset); + + binding_table[0] = + gen4_bind_bo(sna, + op->dst.bo, op->dst.width, op->dst.height, + gen4_get_dest_format(op->dst.format), + TRUE); + for (n = 0; n < n_src; n++) { + binding_table[1+n] = + gen4_bind_video_source(sna, + frame->bo, + src_surf_base[n], + src_width[n], + src_height[n], + src_pitch[n], + src_surf_format); + } + + gen4_emit_state(sna, op, offset); +} + +static Bool +gen4_render_video(struct sna *sna, + struct sna_video *video, + struct sna_video_frame *frame, + RegionPtr dstRegion, + short src_w, short src_h, + short drw_w, short drw_h, + PixmapPtr pixmap) +{ + struct sna_composite_op tmp; + int nbox, dxo, dyo, pix_xoff, pix_yoff; + float src_scale_x, src_scale_y; + struct sna_pixmap *priv; + BoxPtr box; + + DBG(("%s: %dx%d -> %dx%d\n", __FUNCTION__, src_w, src_h, drw_w, drw_h)); + + priv = sna_pixmap_force_to_gpu(pixmap); + if (priv == NULL) + return FALSE; + + memset(&tmp, 0, sizeof(tmp)); + + tmp.op = PictOpSrc; + tmp.dst.pixmap = pixmap; + tmp.dst.width = pixmap->drawable.width; + tmp.dst.height = pixmap->drawable.height; + tmp.dst.format = sna_format_for_depth(pixmap->drawable.depth); + tmp.dst.bo = priv->gpu_bo; + + tmp.src.filter = SAMPLER_FILTER_BILINEAR; + tmp.src.repeat = SAMPLER_EXTEND_NONE; + tmp.u.gen4.wm_kernel = + is_planar_fourcc(frame->id) ? WM_KERNEL_VIDEO_PLANAR : WM_KERNEL_VIDEO_PACKED; + tmp.is_affine = TRUE; + tmp.floats_per_vertex = 3; + tmp.u.gen4.ve_id = 1; + + if (!kgem_check_bo(&sna->kgem, tmp.dst.bo)) + kgem_submit(&sna->kgem); + if (!kgem_check_bo(&sna->kgem, frame->bo)) + kgem_submit(&sna->kgem); + + if (!kgem_bo_is_dirty(frame->bo)) + kgem_emit_flush(&sna->kgem); + + gen4_video_bind_surfaces(sna, &tmp, frame); + gen4_align_vertex(sna, &tmp); + + /* Set up the offset for translating from the given region (in screen + * coordinates) to the backing pixmap. + */ +#ifdef COMPOSITE + pix_xoff = -pixmap->screen_x + pixmap->drawable.x; + pix_yoff = -pixmap->screen_y + pixmap->drawable.y; +#else + pix_xoff = 0; + pix_yoff = 0; +#endif + + dxo = dstRegion->extents.x1; + dyo = dstRegion->extents.y1; + + /* Use normalized texture coordinates */ + src_scale_x = ((float)src_w / frame->width) / (float)drw_w; + src_scale_y = ((float)src_h / frame->height) / (float)drw_h; + + box = REGION_RECTS(dstRegion); + nbox = REGION_NUM_RECTS(dstRegion); + while (nbox--) { + BoxRec r; + + r.x1 = box->x1 + pix_xoff; + r.x2 = box->x2 + pix_xoff; + r.y1 = box->y1 + pix_yoff; + r.y2 = box->y2 + pix_yoff; + + if (!gen4_get_rectangles(sna, &tmp, 1)) { + gen4_video_bind_surfaces(sna, &tmp, frame); + gen4_get_rectangles(sna, &tmp, 1); + } + + OUT_VERTEX(r.x2, r.y2); + OUT_VERTEX_F((box->x2 - dxo) * src_scale_x); + OUT_VERTEX_F((box->y2 - dyo) * src_scale_y); + + OUT_VERTEX(r.x1, r.y2); + OUT_VERTEX_F((box->x1 - dxo) * src_scale_x); + OUT_VERTEX_F((box->y2 - dyo) * src_scale_y); + + OUT_VERTEX(r.x1, r.y1); + OUT_VERTEX_F((box->x1 - dxo) * src_scale_x); + OUT_VERTEX_F((box->y1 - dyo) * src_scale_y); + + FLUSH(); + + sna_damage_add_box(&priv->gpu_damage, &r); + sna_damage_subtract_box(&priv->cpu_damage, &r); + box++; + } + + return TRUE; +} + +static Bool +gen4_composite_solid_init(struct sna *sna, + struct sna_composite_channel *channel, + uint32_t color) +{ + channel->filter = PictFilterNearest; + channel->repeat = RepeatNormal; + channel->is_affine = TRUE; + channel->is_solid = TRUE; + channel->transform = NULL; + channel->width = 1; + channel->height = 1; + channel->card_format = GEN4_SURFACEFORMAT_B8G8R8A8_UNORM; + + channel->bo = sna_render_get_solid(sna, color); + + channel->scale[0] = channel->scale[1] = 1; + channel->offset[0] = channel->offset[1] = 0; + return channel->bo != NULL; +} + +static int +gen4_composite_picture(struct sna *sna, + PicturePtr picture, + struct sna_composite_channel *channel, + int x, int y, + int w, int h, + int dst_x, int dst_y) +{ + PixmapPtr pixmap; + uint32_t color; + int16_t dx, dy; + + DBG(("%s: (%d, %d)x(%d, %d), dst=(%d, %d)\n", + __FUNCTION__, x, y, w, h, dst_x, dst_y)); + + channel->is_solid = FALSE; + channel->card_format = -1; + + if (sna_picture_is_solid(picture, &color)) + return gen4_composite_solid_init(sna, channel, color); + + if (picture->pDrawable == NULL) + return sna_render_picture_fixup(sna, picture, channel, + x, y, w, h, dst_x, dst_y); + + if (!gen4_check_repeat(picture)) + return sna_render_picture_fixup(sna, picture, channel, + x, y, w, h, dst_x, dst_y); + + if (!gen4_check_filter(picture)) + return sna_render_picture_fixup(sna, picture, channel, + x, y, w, h, dst_x, dst_y); + + channel->repeat = picture->repeat ? picture->repeatType : RepeatNone; + channel->filter = picture->filter; + + pixmap = get_drawable_pixmap(picture->pDrawable); + get_drawable_deltas(picture->pDrawable, pixmap, &dx, &dy); + + x += dx + picture->pDrawable->x; + y += dy + picture->pDrawable->y; + + channel->is_affine = sna_transform_is_affine(picture->transform); + if (sna_transform_is_integer_translation(picture->transform, &dx, &dy)) { + DBG(("%s: integer translation (%d, %d), removing\n", + __FUNCTION__, dx, dy)); + x += dx; + y += dy; + channel->transform = NULL; + channel->filter = PictFilterNearest; + } else + channel->transform = picture->transform; + + channel->card_format = gen4_get_card_format(picture->format); + if (channel->card_format == -1) + return sna_render_picture_convert(sna, picture, channel, pixmap, + x, y, w, h, dst_x, dst_y); + + if (pixmap->drawable.width > 8192 || pixmap->drawable.height > 8192) + return sna_render_picture_extract(sna, picture, channel, + x, y, w, h, dst_x, dst_y); + + return sna_render_pixmap_bo(sna, channel, pixmap, + x, y, w, h, dst_x, dst_y); +} + +static void gen4_composite_channel_convert(struct sna_composite_channel *channel) +{ + channel->repeat = gen4_repeat(channel->repeat); + channel->filter = gen4_filter(channel->filter); + if (channel->card_format == -1) + channel->card_format = gen4_get_card_format(channel->pict_format); +} + +static void +gen4_render_composite_done(struct sna *sna, + const struct sna_composite_op *op) +{ + gen4_vertex_flush(sna); + _kgem_set_mode(&sna->kgem, KGEM_RENDER); + sna->render.op = NULL; + + DBG(("%s()\n", __FUNCTION__)); + + sna_render_composite_redirect_done(sna, op); + + if (op->src.bo) + kgem_bo_destroy(&sna->kgem, op->src.bo); + if (op->mask.bo) + kgem_bo_destroy(&sna->kgem, op->mask.bo); +} + +static Bool +gen4_composite_set_target(struct sna *sna, + PicturePtr dst, + struct sna_composite_op *op) +{ + struct sna_pixmap *priv; + + if (!gen4_check_dst_format(dst->format)) { + DBG(("%s: incompatible render target format %08x\n", + __FUNCTION__, dst->format)); + return FALSE; + } + + op->dst.pixmap = get_drawable_pixmap(dst->pDrawable); + op->dst.width = op->dst.pixmap->drawable.width; + op->dst.height = op->dst.pixmap->drawable.height; + op->dst.format = dst->format; + priv = sna_pixmap_force_to_gpu(op->dst.pixmap); + if (priv == NULL) + return FALSE; + + op->dst.bo = priv->gpu_bo; + if (!priv->gpu_only) + op->damage = &priv->gpu_damage; + + get_drawable_deltas(dst->pDrawable, op->dst.pixmap, + &op->dst.x, &op->dst.y); + return TRUE; +} + +static inline Bool +picture_is_cpu(PicturePtr picture) +{ + if (!picture->pDrawable) + return FALSE; + + /* If it is a solid, try to use the render paths */ + if (picture->pDrawable->width == 1 && + picture->pDrawable->height == 1 && + picture->repeat) + return FALSE; + + return is_cpu(picture->pDrawable); +} + +#if PREFER_BLT +static inline bool prefer_blt(struct sna *sna) +{ + return true; +} +#else +static inline bool prefer_blt(struct sna *sna) +{ + return sna->kgem.mode != KGEM_RENDER; +} +#endif + +static Bool +try_blt(struct sna *sna, + PicturePtr dst, + PicturePtr source, + int width, int height) +{ + if (prefer_blt(sna)) { + DBG(("%s: already performing BLT\n", __FUNCTION__)); + return TRUE; + } + + if (width > 8192 || height > 8192) { + DBG(("%s: operation too large for 3D pipe (%d, %d)\n", + __FUNCTION__, width, height)); + return TRUE; + } + + /* is the source picture only in cpu memory e.g. a shm pixmap? */ + return picture_is_cpu(source); +} + +static Bool +gen4_render_composite(struct sna *sna, + uint8_t op, + PicturePtr src, + PicturePtr mask, + PicturePtr dst, + int16_t src_x, int16_t src_y, + int16_t msk_x, int16_t msk_y, + int16_t dst_x, int16_t dst_y, + int16_t width, int16_t height, + struct sna_composite_op *tmp) +{ + DBG(("%s: %dx%d, current mode=%d\n", __FUNCTION__, + width, height, sna->kgem.mode)); + + if (mask == NULL && + try_blt(sna, dst, src, width, height) && + sna_blt_composite(sna, op, + src, dst, + src_x, src_y, + dst_x, dst_y, + width, height, tmp)) + return TRUE; + + if (op >= ARRAY_SIZE(gen4_blend_op)) + return FALSE; + + if (need_tiling(sna, width, height)) + return sna_tiling_composite(sna, + op, src, mask, dst, + src_x, src_y, + msk_x, msk_y, + dst_x, dst_y, + width, height, + tmp); + + if (!gen4_composite_set_target(sna, dst, tmp)) + return FALSE; + + if (tmp->dst.width > 8192 || tmp->dst.height > 8192) { + if (!sna_render_composite_redirect(sna, tmp, + dst_x, dst_y, width, height)) + return FALSE; + } + + switch (gen4_composite_picture(sna, src, &tmp->src, + src_x, src_y, + width, height, + dst_x, dst_y)) { + case -1: + DBG(("%s: failed to prepare source\n", __FUNCTION__)); + goto cleanup_dst; + case 0: + gen4_composite_solid_init(sna, &tmp->src, 0); + case 1: + gen4_composite_channel_convert(&tmp->src); + break; + } + + tmp->op = op; + tmp->is_affine = tmp->src.is_affine; + tmp->has_component_alpha = FALSE; + tmp->need_magic_ca_pass = FALSE; + + tmp->prim_emit = gen4_emit_composite_primitive; + if (mask) { + if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format)) { + tmp->has_component_alpha = TRUE; + + /* Check if it's component alpha that relies on a source alpha and on + * the source value. We can only get one of those into the single + * source value that we get to blend with. + */ + if (gen4_blend_op[op].src_alpha && + (gen4_blend_op[op].src_blend != GEN4_BLENDFACTOR_ZERO)) { + if (op != PictOpOver) { + DBG(("%s -- fallback: unhandled component alpha blend\n", + __FUNCTION__)); + + goto cleanup_src; + } + + tmp->need_magic_ca_pass = TRUE; + tmp->op = PictOpOutReverse; + } + } + + switch (gen4_composite_picture(sna, mask, &tmp->mask, + msk_x, msk_y, + width, height, + dst_x, dst_y)) { + case -1: + DBG(("%s: failed to prepare mask\n", __FUNCTION__)); + goto cleanup_src; + case 0: + gen4_composite_solid_init(sna, &tmp->mask, 0); + case 1: + gen4_composite_channel_convert(&tmp->mask); + break; + } + + tmp->is_affine &= tmp->mask.is_affine; + + if (tmp->src.transform == NULL && tmp->mask.transform == NULL) + tmp->prim_emit = gen4_emit_composite_primitive_identity_source_mask; + + tmp->floats_per_vertex = 5 + 2 * !tmp->is_affine; + } else { + if (tmp->src.is_solid) + tmp->prim_emit = gen4_emit_composite_primitive_solid; + else if (tmp->src.transform == NULL) + tmp->prim_emit = gen4_emit_composite_primitive_identity_source; + else if (tmp->src.is_affine) + tmp->prim_emit = gen4_emit_composite_primitive_affine_source; + + tmp->mask.filter = SAMPLER_FILTER_NEAREST; + tmp->mask.repeat = SAMPLER_EXTEND_NONE; + + tmp->floats_per_vertex = 3 + !tmp->is_affine; + } + + tmp->u.gen4.wm_kernel = + gen4_choose_composite_kernel(tmp->op, + tmp->mask.bo != NULL, + tmp->has_component_alpha, + tmp->is_affine); + tmp->u.gen4.ve_id = (tmp->mask.bo != NULL) << 1 | tmp->is_affine; + + tmp->blt = gen4_render_composite_blt; + tmp->boxes = gen4_render_composite_boxes; + tmp->done = gen4_render_composite_done; + + if (!kgem_check_bo(&sna->kgem, tmp->dst.bo)) + kgem_submit(&sna->kgem); + if (!kgem_check_bo(&sna->kgem, tmp->src.bo)) + kgem_submit(&sna->kgem); + if (!kgem_check_bo(&sna->kgem, tmp->mask.bo)) + kgem_submit(&sna->kgem); + + if (kgem_bo_is_dirty(tmp->src.bo) || kgem_bo_is_dirty(tmp->mask.bo)) + kgem_emit_flush(&sna->kgem); + + gen4_bind_surfaces(sna, tmp); + gen4_align_vertex(sna, tmp); + + sna->render.op = tmp; + return TRUE; + +cleanup_src: + if (tmp->src.bo) + kgem_bo_destroy(&sna->kgem, tmp->src.bo); +cleanup_dst: + if (tmp->redirect.real_bo) + kgem_bo_destroy(&sna->kgem, tmp->dst.bo); + return FALSE; +} + +static uint32_t gen4_get_dest_format_for_depth(int depth) +{ + switch (depth) { + case 32: + case 24: + default: return GEN4_SURFACEFORMAT_B8G8R8A8_UNORM; + case 30: return GEN4_SURFACEFORMAT_B10G10R10A2_UNORM; + case 16: return GEN4_SURFACEFORMAT_B5G6R5_UNORM; + case 8: return GEN4_SURFACEFORMAT_A8_UNORM; + } +} + +static uint32_t gen4_get_card_format_for_depth(int depth) +{ + switch (depth) { + case 32: + default: return GEN4_SURFACEFORMAT_B8G8R8A8_UNORM; + case 30: return GEN4_SURFACEFORMAT_B10G10R10A2_UNORM; + case 24: return GEN4_SURFACEFORMAT_B8G8R8X8_UNORM; + case 16: return GEN4_SURFACEFORMAT_B5G6R5_UNORM; + case 8: return GEN4_SURFACEFORMAT_A8_UNORM; + } +} + +static void +gen4_copy_bind_surfaces(struct sna *sna, const struct sna_composite_op *op) +{ + uint32_t *binding_table; + uint16_t offset; + + gen4_get_batch(sna); + + binding_table = gen4_composite_get_binding_table(sna, op, &offset); + + binding_table[0] = + gen4_bind_bo(sna, + op->dst.bo, op->dst.width, op->dst.height, + gen4_get_dest_format_for_depth(op->dst.pixmap->drawable.depth), + TRUE); + binding_table[1] = + gen4_bind_bo(sna, + op->src.bo, op->src.width, op->src.height, + op->src.card_format, + FALSE); + + if (sna->kgem.surface == offset && + *(uint64_t *)(sna->kgem.batch + sna->render_state.gen4.surface_table) == *(uint64_t*)binding_table) { + sna->kgem.surface += sizeof(struct gen4_surface_state_padded) / sizeof(uint32_t); + offset = sna->render_state.gen4.surface_table; + } + + gen4_emit_state(sna, op, offset); +} + +static void +gen4_render_copy_one(struct sna *sna, + const struct sna_composite_op *op, + int sx, int sy, + int w, int h, + int dx, int dy) +{ + if (!gen4_get_rectangles(sna, op, 1)) { + gen4_copy_bind_surfaces(sna, op); + gen4_get_rectangles(sna, op, 1); + } + + OUT_VERTEX(dx+w, dy+h); + OUT_VERTEX_F((sx+w)*op->src.scale[0]); + OUT_VERTEX_F((sy+h)*op->src.scale[1]); + + OUT_VERTEX(dx, dy+h); + OUT_VERTEX_F(sx*op->src.scale[0]); + OUT_VERTEX_F((sy+h)*op->src.scale[1]); + + OUT_VERTEX(dx, dy); + OUT_VERTEX_F(sx*op->src.scale[0]); + OUT_VERTEX_F(sy*op->src.scale[1]); + + FLUSH(); +} + +static Bool +gen4_render_copy_boxes(struct sna *sna, uint8_t alu, + PixmapPtr src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy, + PixmapPtr dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, + const BoxRec *box, int n) +{ + struct sna_composite_op tmp; + + if (prefer_blt(sna) && + sna_blt_copy_boxes(sna, alu, + src_bo, src_dx, src_dy, + dst_bo, dst_dx, dst_dy, + dst->drawable.bitsPerPixel, + box, n)) + return TRUE; + + if (!(alu == GXcopy || alu == GXclear) || src_bo == dst_bo || + src->drawable.width > 8192 || src->drawable.height > 8192 || + dst->drawable.width > 8192 || dst->drawable.height > 8192) + return sna_blt_copy_boxes(sna, alu, + src_bo, src_dx, src_dy, + dst_bo, dst_dx, dst_dy, + dst->drawable.bitsPerPixel, + box, n); + + DBG(("%s (%d, %d)->(%d, %d) x %d\n", + __FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, n)); + + memset(&tmp, 0, sizeof(tmp)); + + tmp.op = alu == GXcopy ? PictOpSrc : PictOpClear; + + tmp.dst.pixmap = dst; + tmp.dst.width = dst->drawable.width; + tmp.dst.height = dst->drawable.height; + tmp.dst.format = sna_format_for_depth(dst->drawable.depth); + tmp.dst.bo = dst_bo; + + tmp.src.bo = src_bo; + tmp.src.filter = SAMPLER_FILTER_NEAREST; + tmp.src.repeat = SAMPLER_EXTEND_NONE; + tmp.src.card_format = + gen4_get_card_format_for_depth(src->drawable.depth), + tmp.src.width = src->drawable.width; + tmp.src.height = src->drawable.height; + + tmp.is_affine = TRUE; + tmp.floats_per_vertex = 3; + tmp.u.gen4.wm_kernel = WM_KERNEL; + tmp.u.gen4.ve_id = 1; + + if (!kgem_check_bo(&sna->kgem, dst_bo)) + kgem_submit(&sna->kgem); + if (!kgem_check_bo(&sna->kgem, src_bo)) + kgem_submit(&sna->kgem); + + if (kgem_bo_is_dirty(src_bo)) + kgem_emit_flush(&sna->kgem); + + gen4_copy_bind_surfaces(sna, &tmp); + gen4_align_vertex(sna, &tmp); + + tmp.src.scale[0] = 1. / src->drawable.width; + tmp.src.scale[1] = 1. / src->drawable.height; + do { + gen4_render_copy_one(sna, &tmp, + box->x1 + src_dx, box->y1 + src_dy, + box->x2 - box->x1, box->y2 - box->y1, + box->x1 + dst_dx, box->y1 + dst_dy); + box++; + } while (--n); + + _kgem_set_mode(&sna->kgem, KGEM_RENDER); + return TRUE; +} + +static void +gen4_render_copy_blt(struct sna *sna, + const struct sna_copy_op *op, + int16_t sx, int16_t sy, + int16_t w, int16_t h, + int16_t dx, int16_t dy) +{ + gen4_render_copy_one(sna, &op->base, sx, sy, w, h, dx, dy); +} + +static void +gen4_render_copy_done(struct sna *sna, const struct sna_copy_op *op) +{ + gen4_vertex_flush(sna); + _kgem_set_mode(&sna->kgem, KGEM_RENDER); +} + +static Bool +gen4_render_copy(struct sna *sna, uint8_t alu, + PixmapPtr src, struct kgem_bo *src_bo, + PixmapPtr dst, struct kgem_bo *dst_bo, + struct sna_copy_op *op) +{ + if (prefer_blt(sna) && + sna_blt_copy(sna, alu, + src_bo, dst_bo, + dst->drawable.bitsPerPixel, + op)) + return TRUE; + + if (!(alu == GXcopy || alu == GXclear) || src_bo == dst_bo || + src->drawable.width > 8192 || src->drawable.height > 8192 || + dst->drawable.width > 8192 || dst->drawable.height > 8192) + return sna_blt_copy(sna, alu, src_bo, dst_bo, + dst->drawable.bitsPerPixel, + op); + + op->base.op = alu == GXcopy ? PictOpSrc : PictOpClear; + + op->base.dst.pixmap = dst; + op->base.dst.width = dst->drawable.width; + op->base.dst.height = dst->drawable.height; + op->base.dst.format = sna_format_for_depth(dst->drawable.depth); + op->base.dst.bo = dst_bo; + + op->base.src.bo = src_bo; + op->base.src.card_format = + gen4_get_card_format_for_depth(src->drawable.depth), + op->base.src.width = src->drawable.width; + op->base.src.height = src->drawable.height; + op->base.src.scale[0] = 1./src->drawable.width; + op->base.src.scale[1] = 1./src->drawable.height; + op->base.src.filter = SAMPLER_FILTER_NEAREST; + op->base.src.repeat = SAMPLER_EXTEND_NONE; + + op->base.is_affine = true; + op->base.floats_per_vertex = 3; + op->base.u.gen4.wm_kernel = WM_KERNEL; + op->base.u.gen4.ve_id = 1; + + if (!kgem_check_bo(&sna->kgem, dst_bo)) + kgem_submit(&sna->kgem); + if (!kgem_check_bo(&sna->kgem, src_bo)) + kgem_submit(&sna->kgem); + + if (kgem_bo_is_dirty(src_bo)) + kgem_emit_flush(&sna->kgem); + + gen4_copy_bind_surfaces(sna, &op->base); + gen4_align_vertex(sna, &op->base); + + op->blt = gen4_render_copy_blt; + op->done = gen4_render_copy_done; + return TRUE; +} + +static void +gen4_fill_bind_surfaces(struct sna *sna, const struct sna_composite_op *op) +{ + uint32_t *binding_table; + uint16_t offset; + + gen4_get_batch(sna); + + binding_table = gen4_composite_get_binding_table(sna, op, &offset); + + binding_table[0] = + gen4_bind_bo(sna, + op->dst.bo, op->dst.width, op->dst.height, + gen4_get_dest_format_for_depth(op->dst.pixmap->drawable.depth), + TRUE); + binding_table[1] = + gen4_bind_bo(sna, + op->src.bo, 1, 1, + GEN4_SURFACEFORMAT_B8G8R8A8_UNORM, + FALSE); + + if (sna->kgem.surface == offset && + *(uint64_t *)(sna->kgem.batch + sna->render_state.gen4.surface_table) == *(uint64_t*)binding_table) { + sna->kgem.surface += + sizeof(struct gen4_surface_state_padded)/sizeof(uint32_t); + offset = sna->render_state.gen4.surface_table; + } + + gen4_emit_state(sna, op, offset); +} + +static void +gen4_render_fill_one(struct sna *sna, + const struct sna_composite_op *op, + int x, int y, int w, int h) +{ + if (!gen4_get_rectangles(sna, op, 1)) { + gen4_fill_bind_surfaces(sna, op); + gen4_get_rectangles(sna, op, 1); + } + + OUT_VERTEX(x+w, y+h); + OUT_VERTEX_F(1); + OUT_VERTEX_F(1); + + OUT_VERTEX(x, y+h); + OUT_VERTEX_F(0); + OUT_VERTEX_F(1); + + OUT_VERTEX(x, y); + OUT_VERTEX_F(0); + OUT_VERTEX_F(0); + + FLUSH(); +} + +static Bool +gen4_render_fill_boxes(struct sna *sna, + CARD8 op, + PictFormat format, + const xRenderColor *color, + PixmapPtr dst, struct kgem_bo *dst_bo, + const BoxRec *box, int n) +{ + struct sna_composite_op tmp; + uint32_t pixel; + + if (op >= ARRAY_SIZE(gen4_blend_op)) { + DBG(("%s: fallback due to unhandled blend op: %d\n", + __FUNCTION__, op)); + return FALSE; + } + + if (prefer_blt(sna) || + dst->drawable.width > 8192 || + dst->drawable.height > 8192 || + !gen4_check_dst_format(format)) { + uint8_t alu = GXcopy; + + if (op == PictOpClear) { + alu = GXclear; + pixel = 0; + op = PictOpSrc; + } + + if (op == PictOpOver && color->alpha >= 0xff00) + op = PictOpSrc; + + if (op == PictOpSrc && + sna_get_pixel_from_rgba(&pixel, + color->red, + color->green, + color->blue, + color->alpha, + format) && + sna_blt_fill_boxes(sna, alu, + dst_bo, dst->drawable.bitsPerPixel, + pixel, box, n)) + return TRUE; + + if (dst->drawable.width > 8192 || + dst->drawable.height > 8192 || + !gen4_check_dst_format(format)) + return FALSE; + } + + if (!sna_get_pixel_from_rgba(&pixel, + color->red, + color->green, + color->blue, + color->alpha, + PICT_a8r8g8b8)) + return FALSE; + + DBG(("%s(%08x x %d)\n", __FUNCTION__, pixel, n)); + + memset(&tmp, 0, sizeof(tmp)); + + tmp.op = op; + + tmp.dst.pixmap = dst; + tmp.dst.width = dst->drawable.width; + tmp.dst.height = dst->drawable.height; + tmp.dst.format = format; + tmp.dst.bo = dst_bo; + + tmp.src.bo = sna_render_get_solid(sna, pixel); + tmp.src.filter = SAMPLER_FILTER_NEAREST; + tmp.src.repeat = SAMPLER_EXTEND_REPEAT; + + tmp.is_affine = TRUE; + tmp.floats_per_vertex = 3; + tmp.u.gen4.wm_kernel = WM_KERNEL; + tmp.u.gen4.ve_id = 1; + + if (!kgem_check_bo(&sna->kgem, dst_bo)) + kgem_submit(&sna->kgem); + + gen4_fill_bind_surfaces(sna, &tmp); + gen4_align_vertex(sna, &tmp); + + do { + gen4_render_fill_one(sna, &tmp, + box->x1, box->y1, + box->x2 - box->x1, box->y2 - box->y1); + box++; + } while (--n); + + kgem_bo_destroy(&sna->kgem, tmp.src.bo); + _kgem_set_mode(&sna->kgem, KGEM_RENDER); + return TRUE; +} + +static void +gen4_render_fill_blt(struct sna *sna, const struct sna_fill_op *op, + int16_t x, int16_t y, int16_t w, int16_t h) +{ + gen4_render_fill_one(sna, &op->base, x, y, w, h); +} + +static void +gen4_render_fill_done(struct sna *sna, const struct sna_fill_op *op) +{ + gen4_vertex_flush(sna); + kgem_bo_destroy(&sna->kgem, op->base.src.bo); + _kgem_set_mode(&sna->kgem, KGEM_RENDER); +} + +static Bool +gen4_render_fill(struct sna *sna, uint8_t alu, + PixmapPtr dst, struct kgem_bo *dst_bo, + uint32_t color, + struct sna_fill_op *op) +{ + if (prefer_blt(sna) && + sna_blt_fill(sna, alu, + dst_bo, dst->drawable.bitsPerPixel, + color, + op)) + return TRUE; + + if (!(alu == GXcopy || alu == GXclear) || + dst->drawable.width > 8192 || dst->drawable.height > 8192) + return sna_blt_fill(sna, alu, + dst_bo, dst->drawable.bitsPerPixel, + color, + op); + + if (alu == GXclear) + color = 0; + + op->base.op = color == 0 ? PictOpClear : PictOpSrc; + + op->base.dst.pixmap = dst; + op->base.dst.width = dst->drawable.width; + op->base.dst.height = dst->drawable.height; + op->base.dst.format = sna_format_for_depth(dst->drawable.depth); + op->base.dst.bo = dst_bo; + + op->base.src.bo = + sna_render_get_solid(sna, + sna_rgba_for_color(color, + dst->drawable.depth)); + op->base.src.filter = SAMPLER_FILTER_NEAREST; + op->base.src.repeat = SAMPLER_EXTEND_REPEAT; + + op->base.is_affine = TRUE; + op->base.floats_per_vertex = 3; + op->base.u.gen4.wm_kernel = WM_KERNEL; + op->base.u.gen4.ve_id = 1; + + if (!kgem_check_bo(&sna->kgem, dst_bo)) + kgem_submit(&sna->kgem); + + gen4_fill_bind_surfaces(sna, &op->base); + gen4_align_vertex(sna, &op->base); + + op->blt = gen4_render_fill_blt; + op->done = gen4_render_fill_done; + return TRUE; +} + +static void +gen4_render_flush(struct sna *sna) +{ + gen4_vertex_finish(sna, TRUE); +} + +static void +gen4_render_context_switch(struct sna *sna, + int new_mode) +{ + if (sna->kgem.mode == 0) + return; + + if (new_mode == KGEM_BLT) { +#if 0 + OUT_BATCH(MI_FLUSH | + MI_STATE_INSTRUCTION_CACHE_FLUSH | + MI_INHIBIT_RENDER_CACHE_FLUSH); +#endif + } +} + +static void gen4_render_reset(struct sna *sna) +{ + sna->render_state.gen4.needs_invariant = TRUE; + sna->render_state.gen4.vb_id = 0; + sna->render_state.gen4.ve_id = -1; + sna->render_state.gen4.last_primitive = -1; + sna->render_state.gen4.last_pipelined_pointers = 0; + + sna->render_state.gen4.drawrect_offset = -1; + sna->render_state.gen4.drawrect_limit = -1; + sna->render_state.gen4.surface_table = -1; +} + +static void gen4_render_fini(struct sna *sna) +{ + kgem_bo_destroy(&sna->kgem, sna->render_state.gen4.general_bo); +} + +static uint32_t gen4_create_vs_unit_state(struct sna_static_stream *stream) +{ + struct gen4_vs_unit_state *vs = sna_static_stream_map(stream, sizeof(*vs), 32); + + /* Set up the vertex shader to be disabled (passthrough) */ + vs->thread4.nr_urb_entries = URB_VS_ENTRIES; + vs->thread4.urb_entry_allocation_size = URB_VS_ENTRY_SIZE - 1; + vs->vs6.vs_enable = 0; + vs->vs6.vert_cache_disable = 1; + + return sna_static_stream_offsetof(stream, vs); +} + +static uint32_t gen4_create_sf_state(struct sna_static_stream *stream, + uint32_t kernel) +{ + struct gen4_sf_unit_state *sf_state; + + sf_state = sna_static_stream_map(stream, sizeof(*sf_state), 32); + + sf_state->thread0.grf_reg_count = GEN4_GRF_BLOCKS(SF_KERNEL_NUM_GRF); + sf_state->thread0.kernel_start_pointer = kernel >> 6; + sf_state->sf1.single_program_flow = 1; + /* scratch space is not used in our kernel */ + sf_state->thread2.scratch_space_base_pointer = 0; + sf_state->thread3.const_urb_entry_read_length = 0; /* no const URBs */ + sf_state->thread3.const_urb_entry_read_offset = 0; /* no const URBs */ + sf_state->thread3.urb_entry_read_length = 1; /* 1 URB per vertex */ + /* don't smash vertex header, read start from dw8 */ + sf_state->thread3.urb_entry_read_offset = 1; + sf_state->thread3.dispatch_grf_start_reg = 3; + sf_state->thread4.max_threads = SF_MAX_THREADS - 1; + sf_state->thread4.urb_entry_allocation_size = URB_SF_ENTRY_SIZE - 1; + sf_state->thread4.nr_urb_entries = URB_SF_ENTRIES; + sf_state->thread4.stats_enable = 1; + sf_state->sf5.viewport_transform = FALSE; /* skip viewport */ + sf_state->sf6.cull_mode = GEN4_CULLMODE_NONE; + sf_state->sf6.scissor = 0; + sf_state->sf7.trifan_pv = 2; + sf_state->sf6.dest_org_vbias = 0x8; + sf_state->sf6.dest_org_hbias = 0x8; + + return sna_static_stream_offsetof(stream, sf_state); +} + +static uint32_t gen4_create_sampler_state(struct sna_static_stream *stream, + sampler_filter_t src_filter, + sampler_extend_t src_extend, + sampler_filter_t mask_filter, + sampler_extend_t mask_extend) +{ + struct gen4_sampler_state *sampler_state; + + sampler_state = sna_static_stream_map(stream, + sizeof(struct gen4_sampler_state) * 2, + 32); + sampler_state_init(&sampler_state[0], src_filter, src_extend); + sampler_state_init(&sampler_state[1], mask_filter, mask_extend); + + return sna_static_stream_offsetof(stream, sampler_state); +} + +static void gen4_init_wm_state(struct gen4_wm_unit_state *state, + Bool has_mask, + uint32_t kernel, + uint32_t sampler) +{ + state->thread0.grf_reg_count = GEN4_GRF_BLOCKS(PS_KERNEL_NUM_GRF); + state->thread0.kernel_start_pointer = kernel >> 6; + + state->thread1.single_program_flow = 0; + + /* scratch space is not used in our kernel */ + state->thread2.scratch_space_base_pointer = 0; + state->thread2.per_thread_scratch_space = 0; + + state->thread3.const_urb_entry_read_length = 0; + state->thread3.const_urb_entry_read_offset = 0; + + state->thread3.urb_entry_read_offset = 0; + /* wm kernel use urb from 3, see wm_program in compiler module */ + state->thread3.dispatch_grf_start_reg = 3; /* must match kernel */ + + state->wm4.sampler_count = 1; /* 1-4 samplers */ + + state->wm4.sampler_state_pointer = sampler >> 5; + state->wm5.max_threads = PS_MAX_THREADS - 1; + state->wm5.transposed_urb_read = 0; + state->wm5.thread_dispatch_enable = 1; + /* just use 16-pixel dispatch (4 subspans), don't need to change kernel + * start point + */ + state->wm5.enable_16_pix = 1; + state->wm5.enable_8_pix = 0; + state->wm5.early_depth_test = 1; + + /* Each pair of attributes (src/mask coords) is two URB entries */ + if (has_mask) { + state->thread1.binding_table_entry_count = 3; /* 2 tex and fb */ + state->thread3.urb_entry_read_length = 4; + } else { + state->thread1.binding_table_entry_count = 2; /* 1 tex and fb */ + state->thread3.urb_entry_read_length = 2; + } +} + +static uint32_t gen4_create_cc_viewport(struct sna_static_stream *stream) +{ + struct gen4_cc_viewport vp; + + vp.min_depth = -1.e35; + vp.max_depth = 1.e35; + + return sna_static_stream_add(stream, &vp, sizeof(vp), 32); +} + +static uint32_t gen4_create_cc_unit_state(struct sna_static_stream *stream) +{ + uint8_t *ptr, *base; + uint32_t vp; + int i, j; + + vp = gen4_create_cc_viewport(stream); + base = ptr = + sna_static_stream_map(stream, + GEN4_BLENDFACTOR_COUNT*GEN4_BLENDFACTOR_COUNT*64, + 64); + + for (i = 0; i < GEN4_BLENDFACTOR_COUNT; i++) { + for (j = 0; j < GEN4_BLENDFACTOR_COUNT; j++) { + struct gen4_cc_unit_state *state = + (struct gen4_cc_unit_state *)ptr; + + state->cc3.blend_enable = 1; /* enable color blend */ + state->cc4.cc_viewport_state_offset = vp >> 5; + + state->cc5.logicop_func = 0xc; /* COPY */ + state->cc5.ia_blend_function = GEN4_BLENDFUNCTION_ADD; + + /* Fill in alpha blend factors same as color, for the future. */ + state->cc5.ia_src_blend_factor = i; + state->cc5.ia_dest_blend_factor = j; + + state->cc6.blend_function = GEN4_BLENDFUNCTION_ADD; + state->cc6.clamp_post_alpha_blend = 1; + state->cc6.clamp_pre_alpha_blend = 1; + state->cc6.src_blend_factor = i; + state->cc6.dest_blend_factor = j; + + ptr += 64; + } + } + + return sna_static_stream_offsetof(stream, base); +} + +static Bool gen4_render_setup(struct sna *sna) +{ + struct gen4_render_state *state = &sna->render_state.gen4; + struct sna_static_stream general; + struct gen4_wm_unit_state_padded *wm_state; + uint32_t sf[2], wm[KERNEL_COUNT]; + int i, j, k, l, m; + + sna_static_stream_init(&general); + + /* Zero pad the start. If you see an offset of 0x0 in the batchbuffer + * dumps, you know it points to zero. + */ + null_create(&general); + + /* Set up the two SF states (one for blending with a mask, one without) */ + sf[0] = sna_static_stream_add(&general, + sf_kernel, + sizeof(sf_kernel), + 64); + sf[1] = sna_static_stream_add(&general, + sf_kernel_mask, + sizeof(sf_kernel_mask), + 64); + for (m = 0; m < KERNEL_COUNT; m++) { + wm[m] = sna_static_stream_add(&general, + wm_kernels[m].data, + wm_kernels[m].size, + 64); + } + + state->vs = gen4_create_vs_unit_state(&general); + + state->sf[0] = gen4_create_sf_state(&general, sf[0]); + state->sf[1] = gen4_create_sf_state(&general, sf[1]); + + + /* Set up the WM states: each filter/extend type for source and mask, per + * kernel. + */ + wm_state = sna_static_stream_map(&general, + sizeof(*wm_state) * KERNEL_COUNT * + FILTER_COUNT * EXTEND_COUNT * + FILTER_COUNT * EXTEND_COUNT, + 64); + state->wm = sna_static_stream_offsetof(&general, wm_state); + for (i = 0; i < FILTER_COUNT; i++) { + for (j = 0; j < EXTEND_COUNT; j++) { + for (k = 0; k < FILTER_COUNT; k++) { + for (l = 0; l < EXTEND_COUNT; l++) { + uint32_t sampler_state; + + sampler_state = + gen4_create_sampler_state(&general, + i, j, + k, l); + + for (m = 0; m < KERNEL_COUNT; m++) { + gen4_init_wm_state(&wm_state->state, + wm_kernels[m].has_mask, + wm[m], + sampler_state); + wm_state++; + } + } + } + } + } + + state->cc = gen4_create_cc_unit_state(&general); + + state->general_bo = sna_static_stream_fini(sna, &general); + return state->general_bo != NULL; +} + +Bool gen4_render_init(struct sna *sna) +{ + if (!gen4_render_setup(sna)) + return FALSE; + + gen4_render_reset(sna); + + sna->render.composite = gen4_render_composite; + sna->render.video = gen4_render_video; + + sna->render.copy_boxes = gen4_render_copy_boxes; + sna->render.copy = gen4_render_copy; + + sna->render.fill_boxes = gen4_render_fill_boxes; + sna->render.fill = gen4_render_fill; + + sna->render.flush = gen4_render_flush; + sna->render.context_switch = gen4_render_context_switch; + sna->render.reset = gen4_render_reset; + sna->render.fini = gen4_render_fini; + + sna->render.max_3d_size = 8192; + return TRUE; +} diff --git a/src/sna/gen4_render.h b/src/sna/gen4_render.h new file mode 100644 index 00000000..a014e52f --- /dev/null +++ b/src/sna/gen4_render.h @@ -0,0 +1,2643 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef GEN5_RENDER_H +#define GEN5_RENDER_H + +#define GEN4_3D(Pipeline,Opcode,Subopcode) ((3 << 29) | \ + ((Pipeline) << 27) | \ + ((Opcode) << 24) | \ + ((Subopcode) << 16)) + +#define GEN4_URB_FENCE GEN4_3D(0, 0, 0) +#define GEN4_CS_URB_STATE GEN4_3D(0, 0, 1) +#define GEN4_CONSTANT_BUFFER GEN4_3D(0, 0, 2) +#define GEN4_STATE_PREFETCH GEN4_3D(0, 0, 3) + +#define GEN4_STATE_BASE_ADDRESS GEN4_3D(0, 1, 1) +#define GEN4_STATE_SIP GEN4_3D(0, 1, 2) +#define GEN4_PIPELINE_SELECT GEN4_3D(0, 1, 4) + +#define NEW_PIPELINE_SELECT GEN4_3D(1, 1, 4) + +#define GEN4_MEDIA_STATE_POINTERS GEN4_3D(2, 0, 0) +#define GEN4_MEDIA_OBJECT GEN4_3D(2, 1, 0) + +#define GEN4_3DSTATE_PIPELINED_POINTERS GEN4_3D(3, 0, 0) +#define GEN4_3DSTATE_BINDING_TABLE_POINTERS GEN4_3D(3, 0, 1) + +#define GEN4_3DSTATE_VERTEX_BUFFERS GEN4_3D(3, 0, 8) +#define GEN4_3DSTATE_VERTEX_ELEMENTS GEN4_3D(3, 0, 9) +#define GEN4_3DSTATE_INDEX_BUFFER GEN4_3D(3, 0, 0xa) +#define GEN4_3DSTATE_VF_STATISTICS GEN4_3D(3, 0, 0xb) + +#define GEN4_3DSTATE_DRAWING_RECTANGLE GEN4_3D(3, 1, 0) +#define GEN4_3DSTATE_CONSTANT_COLOR GEN4_3D(3, 1, 1) +#define GEN4_3DSTATE_SAMPLER_PALETTE_LOAD GEN4_3D(3, 1, 2) +#define GEN4_3DSTATE_CHROMA_KEY GEN4_3D(3, 1, 4) +#define GEN4_3DSTATE_DEPTH_BUFFER GEN4_3D(3, 1, 5) +# define GEN4_3DSTATE_DEPTH_BUFFER_TYPE_SHIFT 29 +# define GEN4_3DSTATE_DEPTH_BUFFER_FORMAT_SHIFT 18 + +#define GEN4_3DSTATE_POLY_STIPPLE_OFFSET GEN4_3D(3, 1, 6) +#define GEN4_3DSTATE_POLY_STIPPLE_PATTERN GEN4_3D(3, 1, 7) +#define GEN4_3DSTATE_LINE_STIPPLE GEN4_3D(3, 1, 8) +#define GEN4_3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP GEN4_3D(3, 1, 9) +/* These two are BLC and CTG only, not BW or CL */ +#define GEN4_3DSTATE_AA_LINE_PARAMS GEN4_3D(3, 1, 0xa) +#define GEN4_3DSTATE_GS_SVB_INDEX GEN4_3D(3, 1, 0xb) + +#define GEN4_PIPE_CONTROL GEN4_3D(3, 2, 0) + +#define GEN4_3DPRIMITIVE GEN4_3D(3, 3, 0) + +#define GEN4_3DSTATE_CLEAR_PARAMS GEN4_3D(3, 1, 0x10) +/* DW1 */ +# define GEN4_3DSTATE_DEPTH_CLEAR_VALID (1 << 15) + +#define PIPELINE_SELECT_3D 0 +#define PIPELINE_SELECT_MEDIA 1 + +#define UF0_CS_REALLOC (1 << 13) +#define UF0_VFE_REALLOC (1 << 12) +#define UF0_SF_REALLOC (1 << 11) +#define UF0_CLIP_REALLOC (1 << 10) +#define UF0_GS_REALLOC (1 << 9) +#define UF0_VS_REALLOC (1 << 8) +#define UF1_CLIP_FENCE_SHIFT 20 +#define UF1_GS_FENCE_SHIFT 10 +#define UF1_VS_FENCE_SHIFT 0 +#define UF2_CS_FENCE_SHIFT 20 +#define UF2_VFE_FENCE_SHIFT 10 +#define UF2_SF_FENCE_SHIFT 0 + +/* for GEN4_STATE_BASE_ADDRESS */ +#define BASE_ADDRESS_MODIFY (1 << 0) + +/* for GEN4_3DSTATE_PIPELINED_POINTERS */ +#define GEN4_GS_DISABLE 0 +#define GEN4_GS_ENABLE 1 +#define GEN4_CLIP_DISABLE 0 +#define GEN4_CLIP_ENABLE 1 + +/* for GEN4_PIPE_CONTROL */ +#define GEN4_PIPE_CONTROL_NOWRITE (0 << 14) +#define GEN4_PIPE_CONTROL_WRITE_QWORD (1 << 14) +#define GEN4_PIPE_CONTROL_WRITE_DEPTH (2 << 14) +#define GEN4_PIPE_CONTROL_WRITE_TIME (3 << 14) +#define GEN4_PIPE_CONTROL_DEPTH_STALL (1 << 13) +#define GEN4_PIPE_CONTROL_WC_FLUSH (1 << 12) +#define GEN4_PIPE_CONTROL_IS_FLUSH (1 << 11) +#define GEN4_PIPE_CONTROL_TC_FLUSH (1 << 10) +#define GEN4_PIPE_CONTROL_NOTIFY_ENABLE (1 << 8) +#define GEN4_PIPE_CONTROL_GLOBAL_GTT (1 << 2) +#define GEN4_PIPE_CONTROL_LOCAL_PGTT (0 << 2) +#define GEN4_PIPE_CONTROL_DEPTH_CACHE_FLUSH (1 << 0) + +/* VERTEX_BUFFER_STATE Structure */ +#define VB0_BUFFER_INDEX_SHIFT 27 +#define VB0_VERTEXDATA (0 << 26) +#define VB0_INSTANCEDATA (1 << 26) +#define VB0_BUFFER_PITCH_SHIFT 0 + +/* VERTEX_ELEMENT_STATE Structure */ +#define VE0_VERTEX_BUFFER_INDEX_SHIFT 27 +#define VE0_VALID (1 << 26) +#define VE0_FORMAT_SHIFT 16 +#define VE0_OFFSET_SHIFT 0 +#define VE1_VFCOMPONENT_0_SHIFT 28 +#define VE1_VFCOMPONENT_1_SHIFT 24 +#define VE1_VFCOMPONENT_2_SHIFT 20 +#define VE1_VFCOMPONENT_3_SHIFT 16 +#define VE1_DESTINATION_ELEMENT_OFFSET_SHIFT 0 + +/* 3DPRIMITIVE bits */ +#define GEN4_3DPRIMITIVE_VERTEX_SEQUENTIAL (0 << 15) +#define GEN4_3DPRIMITIVE_VERTEX_RANDOM (1 << 15) +/* Primitive types are in gen4_defines.h */ +#define GEN4_3DPRIMITIVE_TOPOLOGY_SHIFT 10 + +#define GEN4_SVG_CTL 0x7400 + +#define GEN4_SVG_CTL_GS_BA (0 << 8) +#define GEN4_SVG_CTL_SS_BA (1 << 8) +#define GEN4_SVG_CTL_IO_BA (2 << 8) +#define GEN4_SVG_CTL_GS_AUB (3 << 8) +#define GEN4_SVG_CTL_IO_AUB (4 << 8) +#define GEN4_SVG_CTL_SIP (5 << 8) + +#define GEN4_SVG_RDATA 0x7404 +#define GEN4_SVG_WORK_CTL 0x7408 + +#define GEN4_VF_CTL 0x7500 + +#define GEN4_VF_CTL_SNAPSHOT_COMPLETE (1 << 31) +#define GEN4_VF_CTL_SNAPSHOT_MUX_SELECT_THREADID (0 << 8) +#define GEN4_VF_CTL_SNAPSHOT_MUX_SELECT_VF_DEBUG (1 << 8) +#define GEN4_VF_CTL_SNAPSHOT_TYPE_VERTEX_SEQUENCE (0 << 4) +#define GEN4_VF_CTL_SNAPSHOT_TYPE_VERTEX_INDEX (1 << 4) +#define GEN4_VF_CTL_SKIP_INITIAL_PRIMITIVES (1 << 3) +#define GEN4_VF_CTL_MAX_PRIMITIVES_LIMIT_ENABLE (1 << 2) +#define GEN4_VF_CTL_VERTEX_RANGE_LIMIT_ENABLE (1 << 1) +#define GEN4_VF_CTL_SNAPSHOT_ENABLE (1 << 0) + +#define GEN4_VF_STRG_VAL 0x7504 +#define GEN4_VF_STR_VL_OVR 0x7508 +#define GEN4_VF_VC_OVR 0x750c +#define GEN4_VF_STR_PSKIP 0x7510 +#define GEN4_VF_MAX_PRIM 0x7514 +#define GEN4_VF_RDATA 0x7518 + +#define GEN4_VS_CTL 0x7600 +#define GEN4_VS_CTL_SNAPSHOT_COMPLETE (1 << 31) +#define GEN4_VS_CTL_SNAPSHOT_MUX_VERTEX_0 (0 << 8) +#define GEN4_VS_CTL_SNAPSHOT_MUX_VERTEX_1 (1 << 8) +#define GEN4_VS_CTL_SNAPSHOT_MUX_VALID_COUNT (2 << 8) +#define GEN4_VS_CTL_SNAPSHOT_MUX_VS_KERNEL_POINTER (3 << 8) +#define GEN4_VS_CTL_SNAPSHOT_ALL_THREADS (1 << 2) +#define GEN4_VS_CTL_THREAD_SNAPSHOT_ENABLE (1 << 1) +#define GEN4_VS_CTL_SNAPSHOT_ENABLE (1 << 0) + +#define GEN4_VS_STRG_VAL 0x7604 +#define GEN4_VS_RDATA 0x7608 + +#define GEN4_SF_CTL 0x7b00 +#define GEN4_SF_CTL_SNAPSHOT_COMPLETE (1 << 31) +#define GEN4_SF_CTL_SNAPSHOT_MUX_VERTEX_0_FF_ID (0 << 8) +#define GEN4_SF_CTL_SNAPSHOT_MUX_VERTEX_0_REL_COUNT (1 << 8) +#define GEN4_SF_CTL_SNAPSHOT_MUX_VERTEX_1_FF_ID (2 << 8) +#define GEN4_SF_CTL_SNAPSHOT_MUX_VERTEX_1_REL_COUNT (3 << 8) +#define GEN4_SF_CTL_SNAPSHOT_MUX_VERTEX_2_FF_ID (4 << 8) +#define GEN4_SF_CTL_SNAPSHOT_MUX_VERTEX_2_REL_COUNT (5 << 8) +#define GEN4_SF_CTL_SNAPSHOT_MUX_VERTEX_COUNT (6 << 8) +#define GEN4_SF_CTL_SNAPSHOT_MUX_SF_KERNEL_POINTER (7 << 8) +#define GEN4_SF_CTL_MIN_MAX_PRIMITIVE_RANGE_ENABLE (1 << 4) +#define GEN4_SF_CTL_DEBUG_CLIP_RECTANGLE_ENABLE (1 << 3) +#define GEN4_SF_CTL_SNAPSHOT_ALL_THREADS (1 << 2) +#define GEN4_SF_CTL_THREAD_SNAPSHOT_ENABLE (1 << 1) +#define GEN4_SF_CTL_SNAPSHOT_ENABLE (1 << 0) + +#define GEN4_SF_STRG_VAL 0x7b04 +#define GEN4_SF_RDATA 0x7b18 + +#define GEN4_WIZ_CTL 0x7c00 +#define GEN4_WIZ_CTL_SNAPSHOT_COMPLETE (1 << 31) +#define GEN4_WIZ_CTL_SUBSPAN_INSTANCE_SHIFT 16 +#define GEN4_WIZ_CTL_SNAPSHOT_MUX_WIZ_KERNEL_POINTER (0 << 8) +#define GEN4_WIZ_CTL_SNAPSHOT_MUX_SUBSPAN_INSTANCE (1 << 8) +#define GEN4_WIZ_CTL_SNAPSHOT_MUX_PRIMITIVE_SEQUENCE (2 << 8) +#define GEN4_WIZ_CTL_SINGLE_SUBSPAN_DISPATCH (1 << 6) +#define GEN4_WIZ_CTL_IGNORE_COLOR_SCOREBOARD_STALLS (1 << 5) +#define GEN4_WIZ_CTL_ENABLE_SUBSPAN_INSTANCE_COMPARE (1 << 4) +#define GEN4_WIZ_CTL_USE_UPSTREAM_SNAPSHOT_FLAG (1 << 3) +#define GEN4_WIZ_CTL_SNAPSHOT_ALL_THREADS (1 << 2) +#define GEN4_WIZ_CTL_THREAD_SNAPSHOT_ENABLE (1 << 1) +#define GEN4_WIZ_CTL_SNAPSHOT_ENABLE (1 << 0) + +#define GEN4_WIZ_STRG_VAL 0x7c04 +#define GEN4_WIZ_RDATA 0x7c18 + +#define GEN4_TS_CTL 0x7e00 +#define GEN4_TS_CTL_SNAPSHOT_COMPLETE (1 << 31) +#define GEN4_TS_CTL_SNAPSHOT_MESSAGE_ERROR (0 << 8) +#define GEN4_TS_CTL_SNAPSHOT_INTERFACE_DESCRIPTOR (3 << 8) +#define GEN4_TS_CTL_SNAPSHOT_ALL_CHILD_THREADS (1 << 2) +#define GEN4_TS_CTL_SNAPSHOT_ALL_ROOT_THREADS (1 << 1) +#define GEN4_TS_CTL_SNAPSHOT_ENABLE (1 << 0) + +#define GEN4_TS_STRG_VAL 0x7e04 +#define GEN4_TS_RDATA 0x7e08 + +#define GEN4_TD_CTL 0x8000 +#define GEN4_TD_CTL_MUX_SHIFT 8 +#define GEN4_TD_CTL_EXTERNAL_HALT_R0_DEBUG_MATCH (1 << 7) +#define GEN4_TD_CTL_FORCE_EXTERNAL_HALT (1 << 6) +#define GEN4_TD_CTL_EXCEPTION_MASK_OVERRIDE (1 << 5) +#define GEN4_TD_CTL_FORCE_THREAD_BREAKPOINT_ENABLE (1 << 4) +#define GEN4_TD_CTL_BREAKPOINT_ENABLE (1 << 2) +#define GEN4_TD_CTL2 0x8004 +#define GEN4_TD_CTL2_ILLEGAL_OPCODE_EXCEPTION_OVERRIDE (1 << 28) +#define GEN4_TD_CTL2_MASKSTACK_EXCEPTION_OVERRIDE (1 << 26) +#define GEN4_TD_CTL2_SOFTWARE_EXCEPTION_OVERRIDE (1 << 25) +#define GEN4_TD_CTL2_ACTIVE_THREAD_LIMIT_SHIFT 16 +#define GEN4_TD_CTL2_ACTIVE_THREAD_LIMIT_ENABLE (1 << 8) +#define GEN4_TD_CTL2_THREAD_SPAWNER_EXECUTION_MASK_ENABLE (1 << 7) +#define GEN4_TD_CTL2_WIZ_EXECUTION_MASK_ENABLE (1 << 6) +#define GEN4_TD_CTL2_SF_EXECUTION_MASK_ENABLE (1 << 5) +#define GEN4_TD_CTL2_CLIPPER_EXECUTION_MASK_ENABLE (1 << 4) +#define GEN4_TD_CTL2_GS_EXECUTION_MASK_ENABLE (1 << 3) +#define GEN4_TD_CTL2_VS_EXECUTION_MASK_ENABLE (1 << 0) +#define GEN4_TD_VF_VS_EMSK 0x8008 +#define GEN4_TD_GS_EMSK 0x800c +#define GEN4_TD_CLIP_EMSK 0x8010 +#define GEN4_TD_SF_EMSK 0x8014 +#define GEN4_TD_WIZ_EMSK 0x8018 +#define GEN4_TD_0_6_EHTRG_VAL 0x801c +#define GEN4_TD_0_7_EHTRG_VAL 0x8020 +#define GEN4_TD_0_6_EHTRG_MSK 0x8024 +#define GEN4_TD_0_7_EHTRG_MSK 0x8028 +#define GEN4_TD_RDATA 0x802c +#define GEN4_TD_TS_EMSK 0x8030 + +#define GEN4_EU_CTL 0x8800 +#define GEN4_EU_CTL_SELECT_SHIFT 16 +#define GEN4_EU_CTL_DATA_MUX_SHIFT 8 +#define GEN4_EU_ATT_0 0x8810 +#define GEN4_EU_ATT_1 0x8814 +#define GEN4_EU_ATT_DATA_0 0x8820 +#define GEN4_EU_ATT_DATA_1 0x8824 +#define GEN4_EU_ATT_CLR_0 0x8830 +#define GEN4_EU_ATT_CLR_1 0x8834 +#define GEN4_EU_RDATA 0x8840 + +/* 3D state: + */ +#define _3DOP_3DSTATE_PIPELINED 0x0 +#define _3DOP_3DSTATE_NONPIPELINED 0x1 +#define _3DOP_3DCONTROL 0x2 +#define _3DOP_3DPRIMITIVE 0x3 + +#define _3DSTATE_PIPELINED_POINTERS 0x00 +#define _3DSTATE_BINDING_TABLE_POINTERS 0x01 +#define _3DSTATE_VERTEX_BUFFERS 0x08 +#define _3DSTATE_VERTEX_ELEMENTS 0x09 +#define _3DSTATE_INDEX_BUFFER 0x0A +#define _3DSTATE_VF_STATISTICS 0x0B +#define _3DSTATE_DRAWING_RECTANGLE 0x00 +#define _3DSTATE_CONSTANT_COLOR 0x01 +#define _3DSTATE_SAMPLER_PALETTE_LOAD 0x02 +#define _3DSTATE_CHROMA_KEY 0x04 +#define _3DSTATE_DEPTH_BUFFER 0x05 +#define _3DSTATE_POLY_STIPPLE_OFFSET 0x06 +#define _3DSTATE_POLY_STIPPLE_PATTERN 0x07 +#define _3DSTATE_LINE_STIPPLE 0x08 +#define _3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP 0x09 +#define _3DCONTROL 0x00 +#define _3DPRIMITIVE 0x00 + +#define _3DPRIM_POINTLIST 0x01 +#define _3DPRIM_LINELIST 0x02 +#define _3DPRIM_LINESTRIP 0x03 +#define _3DPRIM_TRILIST 0x04 +#define _3DPRIM_TRISTRIP 0x05 +#define _3DPRIM_TRIFAN 0x06 +#define _3DPRIM_QUADLIST 0x07 +#define _3DPRIM_QUADSTRIP 0x08 +#define _3DPRIM_LINELIST_ADJ 0x09 +#define _3DPRIM_LINESTRIP_ADJ 0x0A +#define _3DPRIM_TRILIST_ADJ 0x0B +#define _3DPRIM_TRISTRIP_ADJ 0x0C +#define _3DPRIM_TRISTRIP_REVERSE 0x0D +#define _3DPRIM_POLYGON 0x0E +#define _3DPRIM_RECTLIST 0x0F +#define _3DPRIM_LINELOOP 0x10 +#define _3DPRIM_POINTLIST_BF 0x11 +#define _3DPRIM_LINESTRIP_CONT 0x12 +#define _3DPRIM_LINESTRIP_BF 0x13 +#define _3DPRIM_LINESTRIP_CONT_BF 0x14 +#define _3DPRIM_TRIFAN_NOSTIPPLE 0x15 + +#define _3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL 0 +#define _3DPRIM_VERTEXBUFFER_ACCESS_RANDOM 1 + +#define GEN4_ANISORATIO_2 0 +#define GEN4_ANISORATIO_4 1 +#define GEN4_ANISORATIO_6 2 +#define GEN4_ANISORATIO_8 3 +#define GEN4_ANISORATIO_10 4 +#define GEN4_ANISORATIO_12 5 +#define GEN4_ANISORATIO_14 6 +#define GEN4_ANISORATIO_16 7 + +#define GEN4_BLENDFACTOR_ONE 0x1 +#define GEN4_BLENDFACTOR_SRC_COLOR 0x2 +#define GEN4_BLENDFACTOR_SRC_ALPHA 0x3 +#define GEN4_BLENDFACTOR_DST_ALPHA 0x4 +#define GEN4_BLENDFACTOR_DST_COLOR 0x5 +#define GEN4_BLENDFACTOR_SRC_ALPHA_SATURATE 0x6 +#define GEN4_BLENDFACTOR_CONST_COLOR 0x7 +#define GEN4_BLENDFACTOR_CONST_ALPHA 0x8 +#define GEN4_BLENDFACTOR_SRC1_COLOR 0x9 +#define GEN4_BLENDFACTOR_SRC1_ALPHA 0x0A +#define GEN4_BLENDFACTOR_ZERO 0x11 +#define GEN4_BLENDFACTOR_INV_SRC_COLOR 0x12 +#define GEN4_BLENDFACTOR_INV_SRC_ALPHA 0x13 +#define GEN4_BLENDFACTOR_INV_DST_ALPHA 0x14 +#define GEN4_BLENDFACTOR_INV_DST_COLOR 0x15 +#define GEN4_BLENDFACTOR_INV_CONST_COLOR 0x17 +#define GEN4_BLENDFACTOR_INV_CONST_ALPHA 0x18 +#define GEN4_BLENDFACTOR_INV_SRC1_COLOR 0x19 +#define GEN4_BLENDFACTOR_INV_SRC1_ALPHA 0x1A + +#define GEN4_BLENDFUNCTION_ADD 0 +#define GEN4_BLENDFUNCTION_SUBTRACT 1 +#define GEN4_BLENDFUNCTION_REVERSE_SUBTRACT 2 +#define GEN4_BLENDFUNCTION_MIN 3 +#define GEN4_BLENDFUNCTION_MAX 4 + +#define GEN4_ALPHATEST_FORMAT_UNORM8 0 +#define GEN4_ALPHATEST_FORMAT_FLOAT32 1 + +#define GEN4_CHROMAKEY_KILL_ON_ANY_MATCH 0 +#define GEN4_CHROMAKEY_REPLACE_BLACK 1 + +#define GEN4_CLIP_API_OGL 0 +#define GEN4_CLIP_API_DX 1 + +#define GEN4_CLIPMODE_NORMAL 0 +#define GEN4_CLIPMODE_CLIP_ALL 1 +#define GEN4_CLIPMODE_CLIP_NON_REJECTED 2 +#define GEN4_CLIPMODE_REJECT_ALL 3 +#define GEN4_CLIPMODE_ACCEPT_ALL 4 + +#define GEN4_CLIP_NDCSPACE 0 +#define GEN4_CLIP_SCREENSPACE 1 + +#define GEN4_COMPAREFUNCTION_ALWAYS 0 +#define GEN4_COMPAREFUNCTION_NEVER 1 +#define GEN4_COMPAREFUNCTION_LESS 2 +#define GEN4_COMPAREFUNCTION_EQUAL 3 +#define GEN4_COMPAREFUNCTION_LEQUAL 4 +#define GEN4_COMPAREFUNCTION_GREATER 5 +#define GEN4_COMPAREFUNCTION_NOTEQUAL 6 +#define GEN4_COMPAREFUNCTION_GEQUAL 7 + +#define GEN4_COVERAGE_PIXELS_HALF 0 +#define GEN4_COVERAGE_PIXELS_1 1 +#define GEN4_COVERAGE_PIXELS_2 2 +#define GEN4_COVERAGE_PIXELS_4 3 + +#define GEN4_CULLMODE_BOTH 0 +#define GEN4_CULLMODE_NONE 1 +#define GEN4_CULLMODE_FRONT 2 +#define GEN4_CULLMODE_BACK 3 + +#define GEN4_DEFAULTCOLOR_R8G8B8A8_UNORM 0 +#define GEN4_DEFAULTCOLOR_R32G32B32A32_FLOAT 1 + +#define GEN4_DEPTHFORMAT_D32_FLOAT_S8X24_UINT 0 +#define GEN4_DEPTHFORMAT_D32_FLOAT 1 +#define GEN4_DEPTHFORMAT_D24_UNORM_S8_UINT 2 +#define GEN4_DEPTHFORMAT_D16_UNORM 5 + +#define GEN4_FLOATING_POINT_IEEE_754 0 +#define GEN4_FLOATING_POINT_NON_IEEE_754 1 + +#define GEN4_FRONTWINDING_CW 0 +#define GEN4_FRONTWINDING_CCW 1 + +#define GEN4_INDEX_BYTE 0 +#define GEN4_INDEX_WORD 1 +#define GEN4_INDEX_DWORD 2 + +#define GEN4_LOGICOPFUNCTION_CLEAR 0 +#define GEN4_LOGICOPFUNCTION_NOR 1 +#define GEN4_LOGICOPFUNCTION_AND_INVERTED 2 +#define GEN4_LOGICOPFUNCTION_COPY_INVERTED 3 +#define GEN4_LOGICOPFUNCTION_AND_REVERSE 4 +#define GEN4_LOGICOPFUNCTION_INVERT 5 +#define GEN4_LOGICOPFUNCTION_XOR 6 +#define GEN4_LOGICOPFUNCTION_NAND 7 +#define GEN4_LOGICOPFUNCTION_AND 8 +#define GEN4_LOGICOPFUNCTION_EQUIV 9 +#define GEN4_LOGICOPFUNCTION_NOOP 10 +#define GEN4_LOGICOPFUNCTION_OR_INVERTED 11 +#define GEN4_LOGICOPFUNCTION_COPY 12 +#define GEN4_LOGICOPFUNCTION_OR_REVERSE 13 +#define GEN4_LOGICOPFUNCTION_OR 14 +#define GEN4_LOGICOPFUNCTION_SET 15 + +#define GEN4_MAPFILTER_NEAREST 0x0 +#define GEN4_MAPFILTER_LINEAR 0x1 +#define GEN4_MAPFILTER_ANISOTROPIC 0x2 + +#define GEN4_MIPFILTER_NONE 0 +#define GEN4_MIPFILTER_NEAREST 1 +#define GEN4_MIPFILTER_LINEAR 3 + +#define GEN4_POLYGON_FRONT_FACING 0 +#define GEN4_POLYGON_BACK_FACING 1 + +#define GEN4_PREFILTER_ALWAYS 0x0 +#define GEN4_PREFILTER_NEVER 0x1 +#define GEN4_PREFILTER_LESS 0x2 +#define GEN4_PREFILTER_EQUAL 0x3 +#define GEN4_PREFILTER_LEQUAL 0x4 +#define GEN4_PREFILTER_GREATER 0x5 +#define GEN4_PREFILTER_NOTEQUAL 0x6 +#define GEN4_PREFILTER_GEQUAL 0x7 + +#define GEN4_PROVOKING_VERTEX_0 0 +#define GEN4_PROVOKING_VERTEX_1 1 +#define GEN4_PROVOKING_VERTEX_2 2 + +#define GEN4_RASTRULE_UPPER_LEFT 0 +#define GEN4_RASTRULE_UPPER_RIGHT 1 + +#define GEN4_RENDERTARGET_CLAMPRANGE_UNORM 0 +#define GEN4_RENDERTARGET_CLAMPRANGE_SNORM 1 +#define GEN4_RENDERTARGET_CLAMPRANGE_FORMAT 2 + +#define GEN4_STENCILOP_KEEP 0 +#define GEN4_STENCILOP_ZERO 1 +#define GEN4_STENCILOP_REPLACE 2 +#define GEN4_STENCILOP_INCRSAT 3 +#define GEN4_STENCILOP_DECRSAT 4 +#define GEN4_STENCILOP_INCR 5 +#define GEN4_STENCILOP_DECR 6 +#define GEN4_STENCILOP_INVERT 7 + +#define GEN4_SURFACE_MIPMAPLAYOUT_BELOW 0 +#define GEN4_SURFACE_MIPMAPLAYOUT_RIGHT 1 + +#define GEN4_SURFACEFORMAT_R32G32B32A32_FLOAT 0x000 +#define GEN4_SURFACEFORMAT_R32G32B32A32_SINT 0x001 +#define GEN4_SURFACEFORMAT_R32G32B32A32_UINT 0x002 +#define GEN4_SURFACEFORMAT_R32G32B32A32_UNORM 0x003 +#define GEN4_SURFACEFORMAT_R32G32B32A32_SNORM 0x004 +#define GEN4_SURFACEFORMAT_R64G64_FLOAT 0x005 +#define GEN4_SURFACEFORMAT_R32G32B32X32_FLOAT 0x006 +#define GEN4_SURFACEFORMAT_R32G32B32A32_SSCALED 0x007 +#define GEN4_SURFACEFORMAT_R32G32B32A32_USCALED 0x008 +#define GEN4_SURFACEFORMAT_R32G32B32_FLOAT 0x040 +#define GEN4_SURFACEFORMAT_R32G32B32_SINT 0x041 +#define GEN4_SURFACEFORMAT_R32G32B32_UINT 0x042 +#define GEN4_SURFACEFORMAT_R32G32B32_UNORM 0x043 +#define GEN4_SURFACEFORMAT_R32G32B32_SNORM 0x044 +#define GEN4_SURFACEFORMAT_R32G32B32_SSCALED 0x045 +#define GEN4_SURFACEFORMAT_R32G32B32_USCALED 0x046 +#define GEN4_SURFACEFORMAT_R16G16B16A16_UNORM 0x080 +#define GEN4_SURFACEFORMAT_R16G16B16A16_SNORM 0x081 +#define GEN4_SURFACEFORMAT_R16G16B16A16_SINT 0x082 +#define GEN4_SURFACEFORMAT_R16G16B16A16_UINT 0x083 +#define GEN4_SURFACEFORMAT_R16G16B16A16_FLOAT 0x084 +#define GEN4_SURFACEFORMAT_R32G32_FLOAT 0x085 +#define GEN4_SURFACEFORMAT_R32G32_SINT 0x086 +#define GEN4_SURFACEFORMAT_R32G32_UINT 0x087 +#define GEN4_SURFACEFORMAT_R32_FLOAT_X8X24_TYPELESS 0x088 +#define GEN4_SURFACEFORMAT_X32_TYPELESS_G8X24_UINT 0x089 +#define GEN4_SURFACEFORMAT_L32A32_FLOAT 0x08A +#define GEN4_SURFACEFORMAT_R32G32_UNORM 0x08B +#define GEN4_SURFACEFORMAT_R32G32_SNORM 0x08C +#define GEN4_SURFACEFORMAT_R64_FLOAT 0x08D +#define GEN4_SURFACEFORMAT_R16G16B16X16_UNORM 0x08E +#define GEN4_SURFACEFORMAT_R16G16B16X16_FLOAT 0x08F +#define GEN4_SURFACEFORMAT_A32X32_FLOAT 0x090 +#define GEN4_SURFACEFORMAT_L32X32_FLOAT 0x091 +#define GEN4_SURFACEFORMAT_I32X32_FLOAT 0x092 +#define GEN4_SURFACEFORMAT_R16G16B16A16_SSCALED 0x093 +#define GEN4_SURFACEFORMAT_R16G16B16A16_USCALED 0x094 +#define GEN4_SURFACEFORMAT_R32G32_SSCALED 0x095 +#define GEN4_SURFACEFORMAT_R32G32_USCALED 0x096 +#define GEN4_SURFACEFORMAT_B8G8R8A8_UNORM 0x0C0 +#define GEN4_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB 0x0C1 +#define GEN4_SURFACEFORMAT_R10G10B10A2_UNORM 0x0C2 +#define GEN4_SURFACEFORMAT_R10G10B10A2_UNORM_SRGB 0x0C3 +#define GEN4_SURFACEFORMAT_R10G10B10A2_UINT 0x0C4 +#define GEN4_SURFACEFORMAT_R10G10B10_SNORM_A2_UNORM 0x0C5 +#define GEN4_SURFACEFORMAT_R8G8B8A8_UNORM 0x0C7 +#define GEN4_SURFACEFORMAT_R8G8B8A8_UNORM_SRGB 0x0C8 +#define GEN4_SURFACEFORMAT_R8G8B8A8_SNORM 0x0C9 +#define GEN4_SURFACEFORMAT_R8G8B8A8_SINT 0x0CA +#define GEN4_SURFACEFORMAT_R8G8B8A8_UINT 0x0CB +#define GEN4_SURFACEFORMAT_R16G16_UNORM 0x0CC +#define GEN4_SURFACEFORMAT_R16G16_SNORM 0x0CD +#define GEN4_SURFACEFORMAT_R16G16_SINT 0x0CE +#define GEN4_SURFACEFORMAT_R16G16_UINT 0x0CF +#define GEN4_SURFACEFORMAT_R16G16_FLOAT 0x0D0 +#define GEN4_SURFACEFORMAT_B10G10R10A2_UNORM 0x0D1 +#define GEN4_SURFACEFORMAT_B10G10R10A2_UNORM_SRGB 0x0D2 +#define GEN4_SURFACEFORMAT_R11G11B10_FLOAT 0x0D3 +#define GEN4_SURFACEFORMAT_R32_SINT 0x0D6 +#define GEN4_SURFACEFORMAT_R32_UINT 0x0D7 +#define GEN4_SURFACEFORMAT_R32_FLOAT 0x0D8 +#define GEN4_SURFACEFORMAT_R24_UNORM_X8_TYPELESS 0x0D9 +#define GEN4_SURFACEFORMAT_X24_TYPELESS_G8_UINT 0x0DA +#define GEN4_SURFACEFORMAT_L16A16_UNORM 0x0DF +#define GEN4_SURFACEFORMAT_I24X8_UNORM 0x0E0 +#define GEN4_SURFACEFORMAT_L24X8_UNORM 0x0E1 +#define GEN4_SURFACEFORMAT_A24X8_UNORM 0x0E2 +#define GEN4_SURFACEFORMAT_I32_FLOAT 0x0E3 +#define GEN4_SURFACEFORMAT_L32_FLOAT 0x0E4 +#define GEN4_SURFACEFORMAT_A32_FLOAT 0x0E5 +#define GEN4_SURFACEFORMAT_B8G8R8X8_UNORM 0x0E9 +#define GEN4_SURFACEFORMAT_B8G8R8X8_UNORM_SRGB 0x0EA +#define GEN4_SURFACEFORMAT_R8G8B8X8_UNORM 0x0EB +#define GEN4_SURFACEFORMAT_R8G8B8X8_UNORM_SRGB 0x0EC +#define GEN4_SURFACEFORMAT_R9G9B9E5_SHAREDEXP 0x0ED +#define GEN4_SURFACEFORMAT_B10G10R10X2_UNORM 0x0EE +#define GEN4_SURFACEFORMAT_L16A16_FLOAT 0x0F0 +#define GEN4_SURFACEFORMAT_R32_UNORM 0x0F1 +#define GEN4_SURFACEFORMAT_R32_SNORM 0x0F2 +#define GEN4_SURFACEFORMAT_R10G10B10X2_USCALED 0x0F3 +#define GEN4_SURFACEFORMAT_R8G8B8A8_SSCALED 0x0F4 +#define GEN4_SURFACEFORMAT_R8G8B8A8_USCALED 0x0F5 +#define GEN4_SURFACEFORMAT_R16G16_SSCALED 0x0F6 +#define GEN4_SURFACEFORMAT_R16G16_USCALED 0x0F7 +#define GEN4_SURFACEFORMAT_R32_SSCALED 0x0F8 +#define GEN4_SURFACEFORMAT_R32_USCALED 0x0F9 +#define GEN4_SURFACEFORMAT_B5G6R5_UNORM 0x100 +#define GEN4_SURFACEFORMAT_B5G6R5_UNORM_SRGB 0x101 +#define GEN4_SURFACEFORMAT_B5G5R5A1_UNORM 0x102 +#define GEN4_SURFACEFORMAT_B5G5R5A1_UNORM_SRGB 0x103 +#define GEN4_SURFACEFORMAT_B4G4R4A4_UNORM 0x104 +#define GEN4_SURFACEFORMAT_B4G4R4A4_UNORM_SRGB 0x105 +#define GEN4_SURFACEFORMAT_R8G8_UNORM 0x106 +#define GEN4_SURFACEFORMAT_R8G8_SNORM 0x107 +#define GEN4_SURFACEFORMAT_R8G8_SINT 0x108 +#define GEN4_SURFACEFORMAT_R8G8_UINT 0x109 +#define GEN4_SURFACEFORMAT_R16_UNORM 0x10A +#define GEN4_SURFACEFORMAT_R16_SNORM 0x10B +#define GEN4_SURFACEFORMAT_R16_SINT 0x10C +#define GEN4_SURFACEFORMAT_R16_UINT 0x10D +#define GEN4_SURFACEFORMAT_R16_FLOAT 0x10E +#define GEN4_SURFACEFORMAT_I16_UNORM 0x111 +#define GEN4_SURFACEFORMAT_L16_UNORM 0x112 +#define GEN4_SURFACEFORMAT_A16_UNORM 0x113 +#define GEN4_SURFACEFORMAT_L8A8_UNORM 0x114 +#define GEN4_SURFACEFORMAT_I16_FLOAT 0x115 +#define GEN4_SURFACEFORMAT_L16_FLOAT 0x116 +#define GEN4_SURFACEFORMAT_A16_FLOAT 0x117 +#define GEN4_SURFACEFORMAT_R5G5_SNORM_B6_UNORM 0x119 +#define GEN4_SURFACEFORMAT_B5G5R5X1_UNORM 0x11A +#define GEN4_SURFACEFORMAT_B5G5R5X1_UNORM_SRGB 0x11B +#define GEN4_SURFACEFORMAT_R8G8_SSCALED 0x11C +#define GEN4_SURFACEFORMAT_R8G8_USCALED 0x11D +#define GEN4_SURFACEFORMAT_R16_SSCALED 0x11E +#define GEN4_SURFACEFORMAT_R16_USCALED 0x11F +#define GEN4_SURFACEFORMAT_R8_UNORM 0x140 +#define GEN4_SURFACEFORMAT_R8_SNORM 0x141 +#define GEN4_SURFACEFORMAT_R8_SINT 0x142 +#define GEN4_SURFACEFORMAT_R8_UINT 0x143 +#define GEN4_SURFACEFORMAT_A8_UNORM 0x144 +#define GEN4_SURFACEFORMAT_I8_UNORM 0x145 +#define GEN4_SURFACEFORMAT_L8_UNORM 0x146 +#define GEN4_SURFACEFORMAT_P4A4_UNORM 0x147 +#define GEN4_SURFACEFORMAT_A4P4_UNORM 0x148 +#define GEN4_SURFACEFORMAT_R8_SSCALED 0x149 +#define GEN4_SURFACEFORMAT_R8_USCALED 0x14A +#define GEN4_SURFACEFORMAT_R1_UINT 0x181 +#define GEN4_SURFACEFORMAT_YCRCB_NORMAL 0x182 +#define GEN4_SURFACEFORMAT_YCRCB_SWAPUVY 0x183 +#define GEN4_SURFACEFORMAT_BC1_UNORM 0x186 +#define GEN4_SURFACEFORMAT_BC2_UNORM 0x187 +#define GEN4_SURFACEFORMAT_BC3_UNORM 0x188 +#define GEN4_SURFACEFORMAT_BC4_UNORM 0x189 +#define GEN4_SURFACEFORMAT_BC5_UNORM 0x18A +#define GEN4_SURFACEFORMAT_BC1_UNORM_SRGB 0x18B +#define GEN4_SURFACEFORMAT_BC2_UNORM_SRGB 0x18C +#define GEN4_SURFACEFORMAT_BC3_UNORM_SRGB 0x18D +#define GEN4_SURFACEFORMAT_MONO8 0x18E +#define GEN4_SURFACEFORMAT_YCRCB_SWAPUV 0x18F +#define GEN4_SURFACEFORMAT_YCRCB_SWAPY 0x190 +#define GEN4_SURFACEFORMAT_DXT1_RGB 0x191 +#define GEN4_SURFACEFORMAT_FXT1 0x192 +#define GEN4_SURFACEFORMAT_R8G8B8_UNORM 0x193 +#define GEN4_SURFACEFORMAT_R8G8B8_SNORM 0x194 +#define GEN4_SURFACEFORMAT_R8G8B8_SSCALED 0x195 +#define GEN4_SURFACEFORMAT_R8G8B8_USCALED 0x196 +#define GEN4_SURFACEFORMAT_R64G64B64A64_FLOAT 0x197 +#define GEN4_SURFACEFORMAT_R64G64B64_FLOAT 0x198 +#define GEN4_SURFACEFORMAT_BC4_SNORM 0x199 +#define GEN4_SURFACEFORMAT_BC5_SNORM 0x19A +#define GEN4_SURFACEFORMAT_R16G16B16_UNORM 0x19C +#define GEN4_SURFACEFORMAT_R16G16B16_SNORM 0x19D +#define GEN4_SURFACEFORMAT_R16G16B16_SSCALED 0x19E +#define GEN4_SURFACEFORMAT_R16G16B16_USCALED 0x19F + +#define GEN4_SURFACERETURNFORMAT_FLOAT32 0 +#define GEN4_SURFACERETURNFORMAT_S1 1 + +#define GEN4_SURFACE_1D 0 +#define GEN4_SURFACE_2D 1 +#define GEN4_SURFACE_3D 2 +#define GEN4_SURFACE_CUBE 3 +#define GEN4_SURFACE_BUFFER 4 +#define GEN4_SURFACE_NULL 7 + +#define GEN4_BORDER_COLOR_MODE_DEFAULT 0 +#define GEN4_BORDER_COLOR_MODE_LEGACY 1 + +#define GEN4_TEXCOORDMODE_WRAP 0 +#define GEN4_TEXCOORDMODE_MIRROR 1 +#define GEN4_TEXCOORDMODE_CLAMP 2 +#define GEN4_TEXCOORDMODE_CUBE 3 +#define GEN4_TEXCOORDMODE_CLAMP_BORDER 4 +#define GEN4_TEXCOORDMODE_MIRROR_ONCE 5 + +#define GEN4_THREAD_PRIORITY_NORMAL 0 +#define GEN4_THREAD_PRIORITY_HIGH 1 + +#define GEN4_TILEWALK_XMAJOR 0 +#define GEN4_TILEWALK_YMAJOR 1 + +#define GEN4_VERTEX_SUBPIXEL_PRECISION_8BITS 0 +#define GEN4_VERTEX_SUBPIXEL_PRECISION_4BITS 1 + +#define GEN4_VERTEXBUFFER_ACCESS_VERTEXDATA 0 +#define GEN4_VERTEXBUFFER_ACCESS_INSTANCEDATA 1 + +#define GEN4_VFCOMPONENT_NOSTORE 0 +#define GEN4_VFCOMPONENT_STORE_SRC 1 +#define GEN4_VFCOMPONENT_STORE_0 2 +#define GEN4_VFCOMPONENT_STORE_1_FLT 3 +#define GEN4_VFCOMPONENT_STORE_1_INT 4 +#define GEN4_VFCOMPONENT_STORE_VID 5 +#define GEN4_VFCOMPONENT_STORE_IID 6 +#define GEN4_VFCOMPONENT_STORE_PID 7 + + + +/* Execution Unit (EU) defines + */ + +#define GEN4_ALIGN_1 0 +#define GEN4_ALIGN_16 1 + +#define GEN4_ADDRESS_DIRECT 0 +#define GEN4_ADDRESS_REGISTER_INDIRECT_REGISTER 1 + +#define GEN4_CHANNEL_X 0 +#define GEN4_CHANNEL_Y 1 +#define GEN4_CHANNEL_Z 2 +#define GEN4_CHANNEL_W 3 + +#define GEN4_COMPRESSION_NONE 0 +#define GEN4_COMPRESSION_2NDHALF 1 +#define GEN4_COMPRESSION_COMPRESSED 2 + +#define GEN4_CONDITIONAL_NONE 0 +#define GEN4_CONDITIONAL_Z 1 +#define GEN4_CONDITIONAL_NZ 2 +#define GEN4_CONDITIONAL_EQ 1 /* Z */ +#define GEN4_CONDITIONAL_NEQ 2 /* NZ */ +#define GEN4_CONDITIONAL_G 3 +#define GEN4_CONDITIONAL_GE 4 +#define GEN4_CONDITIONAL_L 5 +#define GEN4_CONDITIONAL_LE 6 +#define GEN4_CONDITIONAL_C 7 +#define GEN4_CONDITIONAL_O 8 + +#define GEN4_DEBUG_NONE 0 +#define GEN4_DEBUG_BREAKPOINT 1 + +#define GEN4_DEPENDENCY_NORMAL 0 +#define GEN4_DEPENDENCY_NOTCLEARED 1 +#define GEN4_DEPENDENCY_NOTCHECKED 2 +#define GEN4_DEPENDENCY_DISABLE 3 + +#define GEN4_EXECUTE_1 0 +#define GEN4_EXECUTE_2 1 +#define GEN4_EXECUTE_4 2 +#define GEN4_EXECUTE_8 3 +#define GEN4_EXECUTE_16 4 +#define GEN4_EXECUTE_32 5 + +#define GEN4_HORIZONTAL_STRIDE_0 0 +#define GEN4_HORIZONTAL_STRIDE_1 1 +#define GEN4_HORIZONTAL_STRIDE_2 2 +#define GEN4_HORIZONTAL_STRIDE_4 3 + +#define GEN4_INSTRUCTION_NORMAL 0 +#define GEN4_INSTRUCTION_SATURATE 1 + +#define GEN4_MASK_ENABLE 0 +#define GEN4_MASK_DISABLE 1 + +#define GEN4_OPCODE_MOV 1 +#define GEN4_OPCODE_SEL 2 +#define GEN4_OPCODE_NOT 4 +#define GEN4_OPCODE_AND 5 +#define GEN4_OPCODE_OR 6 +#define GEN4_OPCODE_XOR 7 +#define GEN4_OPCODE_SHR 8 +#define GEN4_OPCODE_SHL 9 +#define GEN4_OPCODE_RSR 10 +#define GEN4_OPCODE_RSL 11 +#define GEN4_OPCODE_ASR 12 +#define GEN4_OPCODE_CMP 16 +#define GEN4_OPCODE_JMPI 32 +#define GEN4_OPCODE_IF 34 +#define GEN4_OPCODE_IFF 35 +#define GEN4_OPCODE_ELSE 36 +#define GEN4_OPCODE_ENDIF 37 +#define GEN4_OPCODE_DO 38 +#define GEN4_OPCODE_WHILE 39 +#define GEN4_OPCODE_BREAK 40 +#define GEN4_OPCODE_CONTINUE 41 +#define GEN4_OPCODE_HALT 42 +#define GEN4_OPCODE_MSAVE 44 +#define GEN4_OPCODE_MRESTORE 45 +#define GEN4_OPCODE_PUSH 46 +#define GEN4_OPCODE_POP 47 +#define GEN4_OPCODE_WAIT 48 +#define GEN4_OPCODE_SEND 49 +#define GEN4_OPCODE_ADD 64 +#define GEN4_OPCODE_MUL 65 +#define GEN4_OPCODE_AVG 66 +#define GEN4_OPCODE_FRC 67 +#define GEN4_OPCODE_RNDU 68 +#define GEN4_OPCODE_RNDD 69 +#define GEN4_OPCODE_RNDE 70 +#define GEN4_OPCODE_RNDZ 71 +#define GEN4_OPCODE_MAC 72 +#define GEN4_OPCODE_MACH 73 +#define GEN4_OPCODE_LZD 74 +#define GEN4_OPCODE_SAD2 80 +#define GEN4_OPCODE_SADA2 81 +#define GEN4_OPCODE_DP4 84 +#define GEN4_OPCODE_DPH 85 +#define GEN4_OPCODE_DP3 86 +#define GEN4_OPCODE_DP2 87 +#define GEN4_OPCODE_DPA2 88 +#define GEN4_OPCODE_LINE 89 +#define GEN4_OPCODE_NOP 126 + +#define GEN4_PREDICATE_NONE 0 +#define GEN4_PREDICATE_NORMAL 1 +#define GEN4_PREDICATE_ALIGN1_ANYV 2 +#define GEN4_PREDICATE_ALIGN1_ALLV 3 +#define GEN4_PREDICATE_ALIGN1_ANY2H 4 +#define GEN4_PREDICATE_ALIGN1_ALL2H 5 +#define GEN4_PREDICATE_ALIGN1_ANY4H 6 +#define GEN4_PREDICATE_ALIGN1_ALL4H 7 +#define GEN4_PREDICATE_ALIGN1_ANY8H 8 +#define GEN4_PREDICATE_ALIGN1_ALL8H 9 +#define GEN4_PREDICATE_ALIGN1_ANY16H 10 +#define GEN4_PREDICATE_ALIGN1_ALL16H 11 +#define GEN4_PREDICATE_ALIGN16_REPLICATE_X 2 +#define GEN4_PREDICATE_ALIGN16_REPLICATE_Y 3 +#define GEN4_PREDICATE_ALIGN16_REPLICATE_Z 4 +#define GEN4_PREDICATE_ALIGN16_REPLICATE_W 5 +#define GEN4_PREDICATE_ALIGN16_ANY4H 6 +#define GEN4_PREDICATE_ALIGN16_ALL4H 7 + +#define GEN4_ARCHITECTURE_REGISTER_FILE 0 +#define GEN4_GENERAL_REGISTER_FILE 1 +#define GEN4_MESSAGE_REGISTER_FILE 2 +#define GEN4_IMMEDIATE_VALUE 3 + +#define GEN4_REGISTER_TYPE_UD 0 +#define GEN4_REGISTER_TYPE_D 1 +#define GEN4_REGISTER_TYPE_UW 2 +#define GEN4_REGISTER_TYPE_W 3 +#define GEN4_REGISTER_TYPE_UB 4 +#define GEN4_REGISTER_TYPE_B 5 +#define GEN4_REGISTER_TYPE_VF 5 /* packed float vector, immediates only? */ +#define GEN4_REGISTER_TYPE_HF 6 +#define GEN4_REGISTER_TYPE_V 6 /* packed int vector, immediates only, uword dest only */ +#define GEN4_REGISTER_TYPE_F 7 + +#define GEN4_ARF_NULL 0x00 +#define GEN4_ARF_ADDRESS 0x10 +#define GEN4_ARF_ACCUMULATOR 0x20 +#define GEN4_ARF_FLAG 0x30 +#define GEN4_ARF_MASK 0x40 +#define GEN4_ARF_MASK_STACK 0x50 +#define GEN4_ARF_MASK_STACK_DEPTH 0x60 +#define GEN4_ARF_STATE 0x70 +#define GEN4_ARF_CONTROL 0x80 +#define GEN4_ARF_NOTIFICATION_COUNT 0x90 +#define GEN4_ARF_IP 0xA0 + +#define GEN4_AMASK 0 +#define GEN4_IMASK 1 +#define GEN4_LMASK 2 +#define GEN4_CMASK 3 + + + +#define GEN4_THREAD_NORMAL 0 +#define GEN4_THREAD_ATOMIC 1 +#define GEN4_THREAD_SWITCH 2 + +#define GEN4_VERTICAL_STRIDE_0 0 +#define GEN4_VERTICAL_STRIDE_1 1 +#define GEN4_VERTICAL_STRIDE_2 2 +#define GEN4_VERTICAL_STRIDE_4 3 +#define GEN4_VERTICAL_STRIDE_8 4 +#define GEN4_VERTICAL_STRIDE_16 5 +#define GEN4_VERTICAL_STRIDE_32 6 +#define GEN4_VERTICAL_STRIDE_64 7 +#define GEN4_VERTICAL_STRIDE_128 8 +#define GEN4_VERTICAL_STRIDE_256 9 +#define GEN4_VERTICAL_STRIDE_ONE_DIMENSIONAL 0xF + +#define GEN4_WIDTH_1 0 +#define GEN4_WIDTH_2 1 +#define GEN4_WIDTH_4 2 +#define GEN4_WIDTH_8 3 +#define GEN4_WIDTH_16 4 + +#define GEN4_STATELESS_BUFFER_BOUNDARY_1K 0 +#define GEN4_STATELESS_BUFFER_BOUNDARY_2K 1 +#define GEN4_STATELESS_BUFFER_BOUNDARY_4K 2 +#define GEN4_STATELESS_BUFFER_BOUNDARY_8K 3 +#define GEN4_STATELESS_BUFFER_BOUNDARY_16K 4 +#define GEN4_STATELESS_BUFFER_BOUNDARY_32K 5 +#define GEN4_STATELESS_BUFFER_BOUNDARY_64K 6 +#define GEN4_STATELESS_BUFFER_BOUNDARY_128K 7 +#define GEN4_STATELESS_BUFFER_BOUNDARY_256K 8 +#define GEN4_STATELESS_BUFFER_BOUNDARY_512K 9 +#define GEN4_STATELESS_BUFFER_BOUNDARY_1M 10 +#define GEN4_STATELESS_BUFFER_BOUNDARY_2M 11 + +#define GEN4_POLYGON_FACING_FRONT 0 +#define GEN4_POLYGON_FACING_BACK 1 + +#define GEN4_MESSAGE_TARGET_NULL 0 +#define GEN4_MESSAGE_TARGET_MATH 1 +#define GEN4_MESSAGE_TARGET_SAMPLER 2 +#define GEN4_MESSAGE_TARGET_GATEWAY 3 +#define GEN4_MESSAGE_TARGET_DATAPORT_READ 4 +#define GEN4_MESSAGE_TARGET_DATAPORT_WRITE 5 +#define GEN4_MESSAGE_TARGET_URB 6 +#define GEN4_MESSAGE_TARGET_THREAD_SPAWNER 7 + +#define GEN4_SAMPLER_RETURN_FORMAT_FLOAT32 0 +#define GEN4_SAMPLER_RETURN_FORMAT_UINT32 2 +#define GEN4_SAMPLER_RETURN_FORMAT_SINT32 3 + +#define GEN4_SAMPLER_MESSAGE_SIMD8_SAMPLE 0 +#define GEN4_SAMPLER_MESSAGE_SIMD16_SAMPLE 0 +#define GEN4_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS 0 +#define GEN4_SAMPLER_MESSAGE_SIMD8_KILLPIX 1 +#define GEN4_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD 1 +#define GEN4_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD 1 +#define GEN4_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_GRADIENTS 2 +#define GEN4_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS 2 +#define GEN4_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_COMPARE 0 +#define GEN4_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE 2 +#define GEN4_SAMPLER_MESSAGE_SIMD4X2_RESINFO 2 +#define GEN4_SAMPLER_MESSAGE_SIMD8_RESINFO 2 +#define GEN4_SAMPLER_MESSAGE_SIMD16_RESINFO 2 +#define GEN4_SAMPLER_MESSAGE_SIMD4X2_LD 3 +#define GEN4_SAMPLER_MESSAGE_SIMD8_LD 3 +#define GEN4_SAMPLER_MESSAGE_SIMD16_LD 3 + +#define GEN4_DATAPORT_OWORD_BLOCK_1_OWORDLOW 0 +#define GEN4_DATAPORT_OWORD_BLOCK_1_OWORDHIGH 1 +#define GEN4_DATAPORT_OWORD_BLOCK_2_OWORDS 2 +#define GEN4_DATAPORT_OWORD_BLOCK_4_OWORDS 3 +#define GEN4_DATAPORT_OWORD_BLOCK_8_OWORDS 4 + +#define GEN4_DATAPORT_OWORD_DUAL_BLOCK_1OWORD 0 +#define GEN4_DATAPORT_OWORD_DUAL_BLOCK_4OWORDS 2 + +#define GEN4_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS 2 +#define GEN4_DATAPORT_DWORD_SCATTERED_BLOCK_16DWORDS 3 + +#define GEN4_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ 0 +#define GEN4_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 1 +#define GEN4_DATAPORT_READ_MESSAGE_DWORD_BLOCK_READ 2 +#define GEN4_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 3 + +#define GEN4_DATAPORT_READ_TARGET_DATA_CACHE 0 +#define GEN4_DATAPORT_READ_TARGET_RENDER_CACHE 1 +#define GEN4_DATAPORT_READ_TARGET_SAMPLER_CACHE 2 + +#define GEN4_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE 0 +#define GEN4_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED 1 +#define GEN4_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01 2 +#define GEN4_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN23 3 +#define GEN4_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01 4 + +#define GEN4_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE 0 +#define GEN4_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE 1 +#define GEN4_DATAPORT_WRITE_MESSAGE_DWORD_BLOCK_WRITE 2 +#define GEN4_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE 3 +#define GEN4_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE 4 +#define GEN4_DATAPORT_WRITE_MESSAGE_STREAMED_VERTEX_BUFFER_WRITE 5 +#define GEN4_DATAPORT_WRITE_MESSAGE_FLUSH_RENDER_CACHE 7 + +#define GEN4_MATH_FUNCTION_INV 1 +#define GEN4_MATH_FUNCTION_LOG 2 +#define GEN4_MATH_FUNCTION_EXP 3 +#define GEN4_MATH_FUNCTION_SQRT 4 +#define GEN4_MATH_FUNCTION_RSQ 5 +#define GEN4_MATH_FUNCTION_SIN 6 /* was 7 */ +#define GEN4_MATH_FUNCTION_COS 7 /* was 8 */ +#define GEN4_MATH_FUNCTION_SINCOS 8 /* was 6 */ +#define GEN4_MATH_FUNCTION_TAN 9 +#define GEN4_MATH_FUNCTION_POW 10 +#define GEN4_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER 11 +#define GEN4_MATH_FUNCTION_INT_DIV_QUOTIENT 12 +#define GEN4_MATH_FUNCTION_INT_DIV_REMAINDER 13 + +#define GEN4_MATH_INTEGER_UNSIGNED 0 +#define GEN4_MATH_INTEGER_SIGNED 1 + +#define GEN4_MATH_PRECISION_FULL 0 +#define GEN4_MATH_PRECISION_PARTIAL 1 + +#define GEN4_MATH_SATURATE_NONE 0 +#define GEN4_MATH_SATURATE_SATURATE 1 + +#define GEN4_MATH_DATA_VECTOR 0 +#define GEN4_MATH_DATA_SCALAR 1 + +#define GEN4_URB_OPCODE_WRITE 0 + +#define GEN4_URB_SWIZZLE_NONE 0 +#define GEN4_URB_SWIZZLE_INTERLEAVE 1 +#define GEN4_URB_SWIZZLE_TRANSPOSE 2 + +#define GEN4_SCRATCH_SPACE_SIZE_1K 0 +#define GEN4_SCRATCH_SPACE_SIZE_2K 1 +#define GEN4_SCRATCH_SPACE_SIZE_4K 2 +#define GEN4_SCRATCH_SPACE_SIZE_8K 3 +#define GEN4_SCRATCH_SPACE_SIZE_16K 4 +#define GEN4_SCRATCH_SPACE_SIZE_32K 5 +#define GEN4_SCRATCH_SPACE_SIZE_64K 6 +#define GEN4_SCRATCH_SPACE_SIZE_128K 7 +#define GEN4_SCRATCH_SPACE_SIZE_256K 8 +#define GEN4_SCRATCH_SPACE_SIZE_512K 9 +#define GEN4_SCRATCH_SPACE_SIZE_1M 10 +#define GEN4_SCRATCH_SPACE_SIZE_2M 11 + + + + +#define CMD_URB_FENCE 0x6000 +#define CMD_CONST_BUFFER_STATE 0x6001 +#define CMD_CONST_BUFFER 0x6002 + +#define CMD_STATE_BASE_ADDRESS 0x6101 +#define CMD_STATE_INSN_POINTER 0x6102 +#define CMD_PIPELINE_SELECT 0x6104 + +#define CMD_PIPELINED_STATE_POINTERS 0x7800 +#define CMD_BINDING_TABLE_PTRS 0x7801 +#define CMD_VERTEX_BUFFER 0x7808 +#define CMD_VERTEX_ELEMENT 0x7809 +#define CMD_INDEX_BUFFER 0x780a +#define CMD_VF_STATISTICS 0x780b + +#define CMD_DRAW_RECT 0x7900 +#define CMD_BLEND_CONSTANT_COLOR 0x7901 +#define CMD_CHROMA_KEY 0x7904 +#define CMD_DEPTH_BUFFER 0x7905 +#define CMD_POLY_STIPPLE_OFFSET 0x7906 +#define CMD_POLY_STIPPLE_PATTERN 0x7907 +#define CMD_LINE_STIPPLE_PATTERN 0x7908 +#define CMD_GLOBAL_DEPTH_OFFSET_CLAMP 0x7908 + +#define CMD_PIPE_CONTROL 0x7a00 + +#define CMD_3D_PRIM 0x7b00 + +#define CMD_MI_FLUSH 0x0200 + + +/* Various values from the R0 vertex header: + */ +#define R02_PRIM_END 0x1 +#define R02_PRIM_START 0x2 + +/* media pipeline */ + +#define GEN4_VFE_MODE_GENERIC 0x0 +#define GEN4_VFE_MODE_VLD_MPEG2 0x1 +#define GEN4_VFE_MODE_IS 0x2 +#define GEN4_VFE_MODE_AVC_MC 0x4 +#define GEN4_VFE_MODE_AVC_IT 0x7 +#define GEN4_VFE_MODE_VC1_IT 0xB + +#define GEN4_VFE_DEBUG_COUNTER_FREE 0 +#define GEN4_VFE_DEBUG_COUNTER_FROZEN 1 +#define GEN4_VFE_DEBUG_COUNTER_ONCE 2 +#define GEN4_VFE_DEBUG_COUNTER_ALWAYS 3 + +/* VLD_STATE */ +#define GEN4_MPEG_TOP_FIELD 1 +#define GEN4_MPEG_BOTTOM_FIELD 2 +#define GEN4_MPEG_FRAME 3 +#define GEN4_MPEG_QSCALE_LINEAR 0 +#define GEN4_MPEG_QSCALE_NONLINEAR 1 +#define GEN4_MPEG_ZIGZAG_SCAN 0 +#define GEN4_MPEG_ALTER_VERTICAL_SCAN 1 +#define GEN4_MPEG_I_PICTURE 1 +#define GEN4_MPEG_P_PICTURE 2 +#define GEN4_MPEG_B_PICTURE 3 + +/* Command packets: + */ +struct header +{ + unsigned int length:16; + unsigned int opcode:16; +}; + + +union header_union +{ + struct header bits; + unsigned int dword; +}; + +struct gen4_3d_control +{ + struct + { + unsigned int length:8; + unsigned int notify_enable:1; + unsigned int pad:3; + unsigned int wc_flush_enable:1; + unsigned int depth_stall_enable:1; + unsigned int operation:2; + unsigned int opcode:16; + } header; + + struct + { + unsigned int pad:2; + unsigned int dest_addr_type:1; + unsigned int dest_addr:29; + } dest; + + unsigned int dword2; + unsigned int dword3; +}; + + +struct gen4_3d_primitive +{ + struct + { + unsigned int length:8; + unsigned int pad:2; + unsigned int topology:5; + unsigned int indexed:1; + unsigned int opcode:16; + } header; + + unsigned int verts_per_instance; + unsigned int start_vert_location; + unsigned int instance_count; + unsigned int start_instance_location; + unsigned int base_vert_location; +}; + +/* These seem to be passed around as function args, so it works out + * better to keep them as #defines: + */ +#define GEN4_FLUSH_READ_CACHE 0x1 +#define GEN4_FLUSH_STATE_CACHE 0x2 +#define GEN4_INHIBIT_FLUSH_RENDER_CACHE 0x4 +#define GEN4_FLUSH_SNAPSHOT_COUNTERS 0x8 + +struct gen4_mi_flush +{ + unsigned int flags:4; + unsigned int pad:12; + unsigned int opcode:16; +}; + +struct gen4_vf_statistics +{ + unsigned int statistics_enable:1; + unsigned int pad:15; + unsigned int opcode:16; +}; + + + +struct gen4_binding_table_pointers +{ + struct header header; + unsigned int vs; + unsigned int gs; + unsigned int clp; + unsigned int sf; + unsigned int wm; +}; + + +struct gen4_blend_constant_color +{ + struct header header; + float blend_constant_color[4]; +}; + + +struct gen4_depthbuffer +{ + union header_union header; + + union { + struct { + unsigned int pitch:18; + unsigned int format:3; + unsigned int pad:4; + unsigned int depth_offset_disable:1; + unsigned int tile_walk:1; + unsigned int tiled_surface:1; + unsigned int pad2:1; + unsigned int surface_type:3; + } bits; + unsigned int dword; + } dword1; + + unsigned int dword2_base_addr; + + union { + struct { + unsigned int pad:1; + unsigned int mipmap_layout:1; + unsigned int lod:4; + unsigned int width:13; + unsigned int height:13; + } bits; + unsigned int dword; + } dword3; + + union { + struct { + unsigned int pad:12; + unsigned int min_array_element:9; + unsigned int depth:11; + } bits; + unsigned int dword; + } dword4; +}; + +struct gen4_drawrect +{ + struct header header; + unsigned int xmin:16; + unsigned int ymin:16; + unsigned int xmax:16; + unsigned int ymax:16; + unsigned int xorg:16; + unsigned int yorg:16; +}; + + + + +struct gen4_global_depth_offset_clamp +{ + struct header header; + float depth_offset_clamp; +}; + +struct gen4_indexbuffer +{ + union { + struct + { + unsigned int length:8; + unsigned int index_format:2; + unsigned int cut_index_enable:1; + unsigned int pad:5; + unsigned int opcode:16; + } bits; + unsigned int dword; + + } header; + + unsigned int buffer_start; + unsigned int buffer_end; +}; + + +struct gen4_line_stipple +{ + struct header header; + + struct + { + unsigned int pattern:16; + unsigned int pad:16; + } bits0; + + struct + { + unsigned int repeat_count:9; + unsigned int pad:7; + unsigned int inverse_repeat_count:16; + } bits1; +}; + + +struct gen4_pipelined_state_pointers +{ + struct header header; + + struct { + unsigned int pad:5; + unsigned int offset:27; + } vs; + + struct + { + unsigned int enable:1; + unsigned int pad:4; + unsigned int offset:27; + } gs; + + struct + { + unsigned int enable:1; + unsigned int pad:4; + unsigned int offset:27; + } clp; + + struct + { + unsigned int pad:5; + unsigned int offset:27; + } sf; + + struct + { + unsigned int pad:5; + unsigned int offset:27; + } wm; + + struct + { + unsigned int pad:5; + unsigned int offset:27; /* KW: check me! */ + } cc; +}; + + +struct gen4_polygon_stipple_offset +{ + struct header header; + + struct { + unsigned int y_offset:5; + unsigned int pad:3; + unsigned int x_offset:5; + unsigned int pad0:19; + } bits0; +}; + + + +struct gen4_polygon_stipple +{ + struct header header; + unsigned int stipple[32]; +}; + + + +struct gen4_pipeline_select +{ + struct + { + unsigned int pipeline_select:1; + unsigned int pad:15; + unsigned int opcode:16; + } header; +}; + + +struct gen4_pipe_control +{ + struct + { + unsigned int length:8; + unsigned int notify_enable:1; + unsigned int pad:2; + unsigned int instruction_state_cache_flush_enable:1; + unsigned int write_cache_flush_enable:1; + unsigned int depth_stall_enable:1; + unsigned int post_sync_operation:2; + + unsigned int opcode:16; + } header; + + struct + { + unsigned int pad:2; + unsigned int dest_addr_type:1; + unsigned int dest_addr:29; + } bits1; + + unsigned int data0; + unsigned int data1; +}; + + +struct gen4_urb_fence +{ + struct + { + unsigned int length:8; + unsigned int vs_realloc:1; + unsigned int gs_realloc:1; + unsigned int clp_realloc:1; + unsigned int sf_realloc:1; + unsigned int vfe_realloc:1; + unsigned int cs_realloc:1; + unsigned int pad:2; + unsigned int opcode:16; + } header; + + struct + { + unsigned int vs_fence:10; + unsigned int gs_fence:10; + unsigned int clp_fence:10; + unsigned int pad:2; + } bits0; + + struct + { + unsigned int sf_fence:10; + unsigned int vf_fence:10; + unsigned int cs_fence:10; + unsigned int pad:2; + } bits1; +}; + +struct gen4_constant_buffer_state /* previously gen4_command_streamer */ +{ + struct header header; + + struct + { + unsigned int nr_urb_entries:3; + unsigned int pad:1; + unsigned int urb_entry_size:5; + unsigned int pad0:23; + } bits0; +}; + +struct gen4_constant_buffer +{ + struct + { + unsigned int length:8; + unsigned int valid:1; + unsigned int pad:7; + unsigned int opcode:16; + } header; + + struct + { + unsigned int buffer_length:6; + unsigned int buffer_address:26; + } bits0; +}; + +struct gen4_state_base_address +{ + struct header header; + + struct + { + unsigned int modify_enable:1; + unsigned int pad:4; + unsigned int general_state_address:27; + } bits0; + + struct + { + unsigned int modify_enable:1; + unsigned int pad:4; + unsigned int surface_state_address:27; + } bits1; + + struct + { + unsigned int modify_enable:1; + unsigned int pad:4; + unsigned int indirect_object_state_address:27; + } bits2; + + struct + { + unsigned int modify_enable:1; + unsigned int pad:11; + unsigned int general_state_upper_bound:20; + } bits3; + + struct + { + unsigned int modify_enable:1; + unsigned int pad:11; + unsigned int indirect_object_state_upper_bound:20; + } bits4; +}; + +struct gen4_state_prefetch +{ + struct header header; + + struct + { + unsigned int prefetch_count:3; + unsigned int pad:3; + unsigned int prefetch_pointer:26; + } bits0; +}; + +struct gen4_system_instruction_pointer +{ + struct header header; + + struct + { + unsigned int pad:4; + unsigned int system_instruction_pointer:28; + } bits0; +}; + + + + +/* State structs for the various fixed function units: + */ + + +struct thread0 +{ + unsigned int pad0:1; + unsigned int grf_reg_count:3; + unsigned int pad1:2; + unsigned int kernel_start_pointer:26; +}; + +struct thread1 +{ + unsigned int ext_halt_exception_enable:1; + unsigned int sw_exception_enable:1; + unsigned int mask_stack_exception_enable:1; + unsigned int timeout_exception_enable:1; + unsigned int illegal_op_exception_enable:1; + unsigned int pad0:3; + unsigned int depth_coef_urb_read_offset:6; /* WM only */ + unsigned int pad1:2; + unsigned int floating_point_mode:1; + unsigned int thread_priority:1; + unsigned int binding_table_entry_count:8; + unsigned int pad3:5; + unsigned int single_program_flow:1; +}; + +struct thread2 +{ + unsigned int per_thread_scratch_space:4; + unsigned int pad0:6; + unsigned int scratch_space_base_pointer:22; +}; + + +struct thread3 +{ + unsigned int dispatch_grf_start_reg:4; + unsigned int urb_entry_read_offset:6; + unsigned int pad0:1; + unsigned int urb_entry_read_length:6; + unsigned int pad1:1; + unsigned int const_urb_entry_read_offset:6; + unsigned int pad2:1; + unsigned int const_urb_entry_read_length:6; + unsigned int pad3:1; +}; + + + +struct gen4_clip_unit_state +{ + struct thread0 thread0; + struct thread1 thread1; + struct thread2 thread2; + struct thread3 thread3; + + struct + { + unsigned int pad0:9; + unsigned int gs_output_stats:1; /* not always */ + unsigned int stats_enable:1; + unsigned int nr_urb_entries:7; + unsigned int pad1:1; + unsigned int urb_entry_allocation_size:5; + unsigned int pad2:1; + unsigned int max_threads:6; /* may be less */ + unsigned int pad3:1; + } thread4; + + struct + { + unsigned int pad0:13; + unsigned int clip_mode:3; + unsigned int userclip_enable_flags:8; + unsigned int userclip_must_clip:1; + unsigned int pad1:1; + unsigned int guard_band_enable:1; + unsigned int viewport_z_clip_enable:1; + unsigned int viewport_xy_clip_enable:1; + unsigned int vertex_position_space:1; + unsigned int api_mode:1; + unsigned int pad2:1; + } clip5; + + struct + { + unsigned int pad0:5; + unsigned int clipper_viewport_state_ptr:27; + } clip6; + + + float viewport_xmin; + float viewport_xmax; + float viewport_ymin; + float viewport_ymax; +}; + + + +struct gen4_cc_unit_state +{ + struct + { + unsigned int pad0:3; + unsigned int bf_stencil_pass_depth_pass_op:3; + unsigned int bf_stencil_pass_depth_fail_op:3; + unsigned int bf_stencil_fail_op:3; + unsigned int bf_stencil_func:3; + unsigned int bf_stencil_enable:1; + unsigned int pad1:2; + unsigned int stencil_write_enable:1; + unsigned int stencil_pass_depth_pass_op:3; + unsigned int stencil_pass_depth_fail_op:3; + unsigned int stencil_fail_op:3; + unsigned int stencil_func:3; + unsigned int stencil_enable:1; + } cc0; + + + struct + { + unsigned int bf_stencil_ref:8; + unsigned int stencil_write_mask:8; + unsigned int stencil_test_mask:8; + unsigned int stencil_ref:8; + } cc1; + + + struct + { + unsigned int logicop_enable:1; + unsigned int pad0:10; + unsigned int depth_write_enable:1; + unsigned int depth_test_function:3; + unsigned int depth_test:1; + unsigned int bf_stencil_write_mask:8; + unsigned int bf_stencil_test_mask:8; + } cc2; + + + struct + { + unsigned int pad0:8; + unsigned int alpha_test_func:3; + unsigned int alpha_test:1; + unsigned int blend_enable:1; + unsigned int ia_blend_enable:1; + unsigned int pad1:1; + unsigned int alpha_test_format:1; + unsigned int pad2:16; + } cc3; + + struct + { + unsigned int pad0:5; + unsigned int cc_viewport_state_offset:27; + } cc4; + + struct + { + unsigned int pad0:2; + unsigned int ia_dest_blend_factor:5; + unsigned int ia_src_blend_factor:5; + unsigned int ia_blend_function:3; + unsigned int statistics_enable:1; + unsigned int logicop_func:4; + unsigned int pad1:11; + unsigned int dither_enable:1; + } cc5; + + struct + { + unsigned int clamp_post_alpha_blend:1; + unsigned int clamp_pre_alpha_blend:1; + unsigned int clamp_range:2; + unsigned int pad0:11; + unsigned int y_dither_offset:2; + unsigned int x_dither_offset:2; + unsigned int dest_blend_factor:5; + unsigned int src_blend_factor:5; + unsigned int blend_function:3; + } cc6; + + struct { + union { + float f; + unsigned char ub[4]; + } alpha_ref; + } cc7; +}; + + + +struct gen4_sf_unit_state +{ + struct thread0 thread0; + struct { + unsigned int pad0:7; + unsigned int sw_exception_enable:1; + unsigned int pad1:3; + unsigned int mask_stack_exception_enable:1; + unsigned int pad2:1; + unsigned int illegal_op_exception_enable:1; + unsigned int pad3:2; + unsigned int floating_point_mode:1; + unsigned int thread_priority:1; + unsigned int binding_table_entry_count:8; + unsigned int pad4:5; + unsigned int single_program_flow:1; + } sf1; + + struct thread2 thread2; + struct thread3 thread3; + + struct + { + unsigned int pad0:10; + unsigned int stats_enable:1; + unsigned int nr_urb_entries:7; + unsigned int pad1:1; + unsigned int urb_entry_allocation_size:5; + unsigned int pad2:1; + unsigned int max_threads:6; + unsigned int pad3:1; + } thread4; + + struct + { + unsigned int front_winding:1; + unsigned int viewport_transform:1; + unsigned int pad0:3; + unsigned int sf_viewport_state_offset:27; + } sf5; + + struct + { + unsigned int pad0:9; + unsigned int dest_org_vbias:4; + unsigned int dest_org_hbias:4; + unsigned int scissor:1; + unsigned int disable_2x2_trifilter:1; + unsigned int disable_zero_pix_trifilter:1; + unsigned int point_rast_rule:2; + unsigned int line_endcap_aa_region_width:2; + unsigned int line_width:4; + unsigned int fast_scissor_disable:1; + unsigned int cull_mode:2; + unsigned int aa_enable:1; + } sf6; + + struct + { + unsigned int point_size:11; + unsigned int use_point_size_state:1; + unsigned int subpixel_precision:1; + unsigned int sprite_point:1; + unsigned int pad0:11; + unsigned int trifan_pv:2; + unsigned int linestrip_pv:2; + unsigned int tristrip_pv:2; + unsigned int line_last_pixel_enable:1; + } sf7; + +}; + + +struct gen4_gs_unit_state +{ + struct thread0 thread0; + struct thread1 thread1; + struct thread2 thread2; + struct thread3 thread3; + + struct + { + unsigned int pad0:10; + unsigned int stats_enable:1; + unsigned int nr_urb_entries:7; + unsigned int pad1:1; + unsigned int urb_entry_allocation_size:5; + unsigned int pad2:1; + unsigned int max_threads:1; + unsigned int pad3:6; + } thread4; + + struct + { + unsigned int sampler_count:3; + unsigned int pad0:2; + unsigned int sampler_state_pointer:27; + } gs5; + + + struct + { + unsigned int max_vp_index:4; + unsigned int pad0:26; + unsigned int reorder_enable:1; + unsigned int pad1:1; + } gs6; +}; + + +struct gen4_vs_unit_state +{ + struct thread0 thread0; + struct thread1 thread1; + struct thread2 thread2; + struct thread3 thread3; + + struct + { + unsigned int pad0:10; + unsigned int stats_enable:1; + unsigned int nr_urb_entries:7; + unsigned int pad1:1; + unsigned int urb_entry_allocation_size:5; + unsigned int pad2:1; + unsigned int max_threads:4; + unsigned int pad3:3; + } thread4; + + struct + { + unsigned int sampler_count:3; + unsigned int pad0:2; + unsigned int sampler_state_pointer:27; + } vs5; + + struct + { + unsigned int vs_enable:1; + unsigned int vert_cache_disable:1; + unsigned int pad0:30; + } vs6; +}; + + +struct gen4_wm_unit_state +{ + struct thread0 thread0; + struct thread1 thread1; + struct thread2 thread2; + struct thread3 thread3; + + struct { + unsigned int stats_enable:1; + unsigned int pad0:1; + unsigned int sampler_count:3; + unsigned int sampler_state_pointer:27; + } wm4; + + struct + { + unsigned int enable_8_pix:1; + unsigned int enable_16_pix:1; + unsigned int enable_32_pix:1; + unsigned int pad0:7; + unsigned int legacy_global_depth_bias:1; + unsigned int line_stipple:1; + unsigned int depth_offset:1; + unsigned int polygon_stipple:1; + unsigned int line_aa_region_width:2; + unsigned int line_endcap_aa_region_width:2; + unsigned int early_depth_test:1; + unsigned int thread_dispatch_enable:1; + unsigned int program_uses_depth:1; + unsigned int program_computes_depth:1; + unsigned int program_uses_killpixel:1; + unsigned int legacy_line_rast: 1; + unsigned int transposed_urb_read:1; + unsigned int max_threads:7; + } wm5; + + float global_depth_offset_constant; + float global_depth_offset_scale; + + struct { + unsigned int pad0:1; + unsigned int grf_reg_count_1:3; + unsigned int pad1:2; + unsigned int kernel_start_pointer_1:26; + } wm8; + + struct { + unsigned int pad0:1; + unsigned int grf_reg_count_2:3; + unsigned int pad1:2; + unsigned int kernel_start_pointer_2:26; + } wm9; + + struct { + unsigned int pad0:1; + unsigned int grf_reg_count_3:3; + unsigned int pad1:2; + unsigned int kernel_start_pointer_3:26; + } wm10; +}; + +struct gen4_wm_unit_state_padded { + struct gen4_wm_unit_state state; + char pad[64 - sizeof(struct gen4_wm_unit_state)]; +}; + +/* The hardware supports two different modes for border color. The + * default (OpenGL) mode uses floating-point color channels, while the + * legacy mode uses 4 bytes. + * + * More significantly, the legacy mode respects the components of the + * border color for channels not present in the source, (whereas the + * default mode will ignore the border color's alpha channel and use + * alpha==1 for an RGB source, for example). + * + * The legacy mode matches the semantics specified by the Render + * extension. + */ +struct gen4_sampler_default_border_color { + float color[4]; +}; + +struct gen4_sampler_legacy_border_color { + uint8_t color[4]; +}; + +struct gen4_sampler_state +{ + + struct + { + unsigned int shadow_function:3; + unsigned int lod_bias:11; + unsigned int min_filter:3; + unsigned int mag_filter:3; + unsigned int mip_filter:2; + unsigned int base_level:5; + unsigned int pad:1; + unsigned int lod_preclamp:1; + unsigned int border_color_mode:1; + unsigned int pad0:1; + unsigned int disable:1; + } ss0; + + struct + { + unsigned int r_wrap_mode:3; + unsigned int t_wrap_mode:3; + unsigned int s_wrap_mode:3; + unsigned int pad:3; + unsigned int max_lod:10; + unsigned int min_lod:10; + } ss1; + + + struct + { + unsigned int pad:5; + unsigned int border_color_pointer:27; + } ss2; + + struct + { + unsigned int pad:19; + unsigned int max_aniso:3; + unsigned int chroma_key_mode:1; + unsigned int chroma_key_index:2; + unsigned int chroma_key_enable:1; + unsigned int monochrome_filter_width:3; + unsigned int monochrome_filter_height:3; + } ss3; +}; + + +struct gen4_clipper_viewport +{ + float xmin; + float xmax; + float ymin; + float ymax; +}; + +struct gen4_cc_viewport +{ + float min_depth; + float max_depth; +}; + +struct gen4_sf_viewport +{ + struct { + float m00; + float m11; + float m22; + float m30; + float m31; + float m32; + } viewport; + + struct { + short xmin; + short ymin; + short xmax; + short ymax; + } scissor; +}; + +/* Documented in the subsystem/shared-functions/sampler chapter... + */ +struct gen4_surface_state +{ + struct { + unsigned int cube_pos_z:1; + unsigned int cube_neg_z:1; + unsigned int cube_pos_y:1; + unsigned int cube_neg_y:1; + unsigned int cube_pos_x:1; + unsigned int cube_neg_x:1; + unsigned int pad:3; + unsigned int render_cache_read_mode:1; + unsigned int mipmap_layout_mode:1; + unsigned int vert_line_stride_ofs:1; + unsigned int vert_line_stride:1; + unsigned int color_blend:1; + unsigned int writedisable_blue:1; + unsigned int writedisable_green:1; + unsigned int writedisable_red:1; + unsigned int writedisable_alpha:1; + unsigned int surface_format:9; + unsigned int data_return_format:1; + unsigned int pad0:1; + unsigned int surface_type:3; + } ss0; + + struct { + unsigned int base_addr; + } ss1; + + struct { + unsigned int render_target_rotation:2; + unsigned int mip_count:4; + unsigned int width:13; + unsigned int height:13; + } ss2; + + struct { + unsigned int tile_walk:1; + unsigned int tiled_surface:1; + unsigned int pad:1; + unsigned int pitch:18; + unsigned int depth:11; + } ss3; + + struct { + unsigned int pad:19; + unsigned int min_array_elt:9; + unsigned int min_lod:4; + } ss4; + + struct { + unsigned int pad:20; + unsigned int y_offset:4; + unsigned int pad2:1; + unsigned int x_offset:7; + } ss5; +}; + + + +struct gen4_vertex_buffer_state +{ + struct { + unsigned int pitch:11; + unsigned int pad:15; + unsigned int access_type:1; + unsigned int vb_index:5; + } vb0; + + unsigned int start_addr; + unsigned int max_index; +#if 1 + unsigned int instance_data_step_rate; /* not included for sequential/random vertices? */ +#endif +}; + +#define GEN4_VBP_MAX 17 + +struct gen4_vb_array_state { + struct header header; + struct gen4_vertex_buffer_state vb[GEN4_VBP_MAX]; +}; + + +struct gen4_vertex_element_state +{ + struct + { + unsigned int src_offset:11; + unsigned int pad:5; + unsigned int src_format:9; + unsigned int pad0:1; + unsigned int valid:1; + unsigned int vertex_buffer_index:5; + } ve0; + + struct + { + unsigned int dst_offset:8; + unsigned int pad:8; + unsigned int vfcomponent3:4; + unsigned int vfcomponent2:4; + unsigned int vfcomponent1:4; + unsigned int vfcomponent0:4; + } ve1; +}; + +#define GEN4_VEP_MAX 18 + +struct gen4_vertex_element_packet { + struct header header; + struct gen4_vertex_element_state ve[GEN4_VEP_MAX]; /* note: less than _TNL_ATTRIB_MAX */ +}; + + +struct gen4_urb_immediate { + unsigned int opcode:4; + unsigned int offset:6; + unsigned int swizzle_control:2; + unsigned int pad:1; + unsigned int allocate:1; + unsigned int used:1; + unsigned int complete:1; + unsigned int response_length:4; + unsigned int msg_length:4; + unsigned int msg_target:4; + unsigned int pad1:3; + unsigned int end_of_thread:1; +}; + +/* Instruction format for the execution units: + */ + +struct gen4_instruction +{ + struct + { + unsigned int opcode:7; + unsigned int pad:1; + unsigned int access_mode:1; + unsigned int mask_control:1; + unsigned int dependency_control:2; + unsigned int compression_control:2; + unsigned int thread_control:2; + unsigned int predicate_control:4; + unsigned int predicate_inverse:1; + unsigned int execution_size:3; + unsigned int destreg__conditonalmod:4; /* destreg - send, conditionalmod - others */ + unsigned int pad0:2; + unsigned int debug_control:1; + unsigned int saturate:1; + } header; + + union { + struct + { + unsigned int dest_reg_file:2; + unsigned int dest_reg_type:3; + unsigned int src0_reg_file:2; + unsigned int src0_reg_type:3; + unsigned int src1_reg_file:2; + unsigned int src1_reg_type:3; + unsigned int pad:1; + unsigned int dest_subreg_nr:5; + unsigned int dest_reg_nr:8; + unsigned int dest_horiz_stride:2; + unsigned int dest_address_mode:1; + } da1; + + struct + { + unsigned int dest_reg_file:2; + unsigned int dest_reg_type:3; + unsigned int src0_reg_file:2; + unsigned int src0_reg_type:3; + unsigned int pad:6; + int dest_indirect_offset:10; /* offset against the deref'd address reg */ + unsigned int dest_subreg_nr:3; /* subnr for the address reg a0.x */ + unsigned int dest_horiz_stride:2; + unsigned int dest_address_mode:1; + } ia1; + + struct + { + unsigned int dest_reg_file:2; + unsigned int dest_reg_type:3; + unsigned int src0_reg_file:2; + unsigned int src0_reg_type:3; + unsigned int src1_reg_file:2; + unsigned int src1_reg_type:3; + unsigned int pad0:1; + unsigned int dest_writemask:4; + unsigned int dest_subreg_nr:1; + unsigned int dest_reg_nr:8; + unsigned int pad1:2; + unsigned int dest_address_mode:1; + } da16; + + struct + { + unsigned int dest_reg_file:2; + unsigned int dest_reg_type:3; + unsigned int src0_reg_file:2; + unsigned int src0_reg_type:3; + unsigned int pad0:6; + unsigned int dest_writemask:4; + int dest_indirect_offset:6; + unsigned int dest_subreg_nr:3; + unsigned int pad1:2; + unsigned int dest_address_mode:1; + } ia16; + } bits1; + + + union { + struct + { + unsigned int src0_subreg_nr:5; + unsigned int src0_reg_nr:8; + unsigned int src0_abs:1; + unsigned int src0_negate:1; + unsigned int src0_address_mode:1; + unsigned int src0_horiz_stride:2; + unsigned int src0_width:3; + unsigned int src0_vert_stride:4; + unsigned int flag_reg_nr:1; + unsigned int pad:6; + } da1; + + struct + { + int src0_indirect_offset:10; + unsigned int src0_subreg_nr:3; + unsigned int src0_abs:1; + unsigned int src0_negate:1; + unsigned int src0_address_mode:1; + unsigned int src0_horiz_stride:2; + unsigned int src0_width:3; + unsigned int src0_vert_stride:4; + unsigned int flag_reg_nr:1; + unsigned int pad:6; + } ia1; + + struct + { + unsigned int src0_swz_x:2; + unsigned int src0_swz_y:2; + unsigned int src0_subreg_nr:1; + unsigned int src0_reg_nr:8; + unsigned int src0_abs:1; + unsigned int src0_negate:1; + unsigned int src0_address_mode:1; + unsigned int src0_swz_z:2; + unsigned int src0_swz_w:2; + unsigned int pad0:1; + unsigned int src0_vert_stride:4; + unsigned int flag_reg_nr:1; + unsigned int pad1:6; + } da16; + + struct + { + unsigned int src0_swz_x:2; + unsigned int src0_swz_y:2; + int src0_indirect_offset:6; + unsigned int src0_subreg_nr:3; + unsigned int src0_abs:1; + unsigned int src0_negate:1; + unsigned int src0_address_mode:1; + unsigned int src0_swz_z:2; + unsigned int src0_swz_w:2; + unsigned int pad0:1; + unsigned int src0_vert_stride:4; + unsigned int flag_reg_nr:1; + unsigned int pad1:6; + } ia16; + + } bits2; + + union + { + struct + { + unsigned int src1_subreg_nr:5; + unsigned int src1_reg_nr:8; + unsigned int src1_abs:1; + unsigned int src1_negate:1; + unsigned int pad:1; + unsigned int src1_horiz_stride:2; + unsigned int src1_width:3; + unsigned int src1_vert_stride:4; + unsigned int pad0:7; + } da1; + + struct + { + unsigned int src1_swz_x:2; + unsigned int src1_swz_y:2; + unsigned int src1_subreg_nr:1; + unsigned int src1_reg_nr:8; + unsigned int src1_abs:1; + unsigned int src1_negate:1; + unsigned int pad0:1; + unsigned int src1_swz_z:2; + unsigned int src1_swz_w:2; + unsigned int pad1:1; + unsigned int src1_vert_stride:4; + unsigned int pad2:7; + } da16; + + struct + { + int src1_indirect_offset:10; + unsigned int src1_subreg_nr:3; + unsigned int src1_abs:1; + unsigned int src1_negate:1; + unsigned int pad0:1; + unsigned int src1_horiz_stride:2; + unsigned int src1_width:3; + unsigned int src1_vert_stride:4; + unsigned int flag_reg_nr:1; + unsigned int pad1:6; + } ia1; + + struct + { + unsigned int src1_swz_x:2; + unsigned int src1_swz_y:2; + int src1_indirect_offset:6; + unsigned int src1_subreg_nr:3; + unsigned int src1_abs:1; + unsigned int src1_negate:1; + unsigned int pad0:1; + unsigned int src1_swz_z:2; + unsigned int src1_swz_w:2; + unsigned int pad1:1; + unsigned int src1_vert_stride:4; + unsigned int flag_reg_nr:1; + unsigned int pad2:6; + } ia16; + + + struct + { + int jump_count:16; /* note: signed */ + unsigned int pop_count:4; + unsigned int pad0:12; + } if_else; + + struct { + unsigned int function:4; + unsigned int int_type:1; + unsigned int precision:1; + unsigned int saturate:1; + unsigned int data_type:1; + unsigned int pad0:8; + unsigned int response_length:4; + unsigned int msg_length:4; + unsigned int msg_target:4; + unsigned int pad1:3; + unsigned int end_of_thread:1; + } math; + + struct { + unsigned int binding_table_index:8; + unsigned int sampler:4; + unsigned int return_format:2; + unsigned int msg_type:2; + unsigned int response_length:4; + unsigned int msg_length:4; + unsigned int msg_target:4; + unsigned int pad1:3; + unsigned int end_of_thread:1; + } sampler; + + struct gen4_urb_immediate urb; + + struct { + unsigned int binding_table_index:8; + unsigned int msg_control:4; + unsigned int msg_type:2; + unsigned int target_cache:2; + unsigned int response_length:4; + unsigned int msg_length:4; + unsigned int msg_target:4; + unsigned int pad1:3; + unsigned int end_of_thread:1; + } dp_read; + + struct { + unsigned int binding_table_index:8; + unsigned int msg_control:3; + unsigned int pixel_scoreboard_clear:1; + unsigned int msg_type:3; + unsigned int send_commit_msg:1; + unsigned int response_length:4; + unsigned int msg_length:4; + unsigned int msg_target:4; + unsigned int pad1:3; + unsigned int end_of_thread:1; + } dp_write; + + struct { + unsigned int pad:16; + unsigned int response_length:4; + unsigned int msg_length:4; + unsigned int msg_target:4; + unsigned int pad1:3; + unsigned int end_of_thread:1; + } generic; + + unsigned int ud; + } bits3; +}; + +/* media pipeline */ + +struct gen4_vfe_state { + struct { + unsigned int per_thread_scratch_space:4; + unsigned int pad3:3; + unsigned int extend_vfe_state_present:1; + unsigned int pad2:2; + unsigned int scratch_base:22; + } vfe0; + + struct { + unsigned int debug_counter_control:2; + unsigned int children_present:1; + unsigned int vfe_mode:4; + unsigned int pad2:2; + unsigned int num_urb_entries:7; + unsigned int urb_entry_alloc_size:9; + unsigned int max_threads:7; + } vfe1; + + struct { + unsigned int pad4:4; + unsigned int interface_descriptor_base:28; + } vfe2; +}; + +struct gen4_vld_state { + struct { + unsigned int pad6:6; + unsigned int scan_order:1; + unsigned int intra_vlc_format:1; + unsigned int quantizer_scale_type:1; + unsigned int concealment_motion_vector:1; + unsigned int frame_predict_frame_dct:1; + unsigned int top_field_first:1; + unsigned int picture_structure:2; + unsigned int intra_dc_precision:2; + unsigned int f_code_0_0:4; + unsigned int f_code_0_1:4; + unsigned int f_code_1_0:4; + unsigned int f_code_1_1:4; + } vld0; + + struct { + unsigned int pad2:9; + unsigned int picture_coding_type:2; + unsigned int pad:21; + } vld1; + + struct { + unsigned int index_0:4; + unsigned int index_1:4; + unsigned int index_2:4; + unsigned int index_3:4; + unsigned int index_4:4; + unsigned int index_5:4; + unsigned int index_6:4; + unsigned int index_7:4; + } desc_remap_table0; + + struct { + unsigned int index_8:4; + unsigned int index_9:4; + unsigned int index_10:4; + unsigned int index_11:4; + unsigned int index_12:4; + unsigned int index_13:4; + unsigned int index_14:4; + unsigned int index_15:4; + } desc_remap_table1; +}; + +struct gen4_interface_descriptor { + struct { + unsigned int grf_reg_blocks:4; + unsigned int pad:2; + unsigned int kernel_start_pointer:26; + } desc0; + + struct { + unsigned int pad:7; + unsigned int software_exception:1; + unsigned int pad2:3; + unsigned int maskstack_exception:1; + unsigned int pad3:1; + unsigned int illegal_opcode_exception:1; + unsigned int pad4:2; + unsigned int floating_point_mode:1; + unsigned int thread_priority:1; + unsigned int single_program_flow:1; + unsigned int pad5:1; + unsigned int const_urb_entry_read_offset:6; + unsigned int const_urb_entry_read_len:6; + } desc1; + + struct { + unsigned int pad:2; + unsigned int sampler_count:3; + unsigned int sampler_state_pointer:27; + } desc2; + + struct { + unsigned int binding_table_entry_count:5; + unsigned int binding_table_pointer:27; + } desc3; +}; + +struct gen6_blend_state +{ + struct { + unsigned int dest_blend_factor:5; + unsigned int source_blend_factor:5; + unsigned int pad3:1; + unsigned int blend_func:3; + unsigned int pad2:1; + unsigned int ia_dest_blend_factor:5; + unsigned int ia_source_blend_factor:5; + unsigned int pad1:1; + unsigned int ia_blend_func:3; + unsigned int pad0:1; + unsigned int ia_blend_enable:1; + unsigned int blend_enable:1; + } blend0; + + struct { + unsigned int post_blend_clamp_enable:1; + unsigned int pre_blend_clamp_enable:1; + unsigned int clamp_range:2; + unsigned int pad0:4; + unsigned int x_dither_offset:2; + unsigned int y_dither_offset:2; + unsigned int dither_enable:1; + unsigned int alpha_test_func:3; + unsigned int alpha_test_enable:1; + unsigned int pad1:1; + unsigned int logic_op_func:4; + unsigned int logic_op_enable:1; + unsigned int pad2:1; + unsigned int write_disable_b:1; + unsigned int write_disable_g:1; + unsigned int write_disable_r:1; + unsigned int write_disable_a:1; + unsigned int pad3:1; + unsigned int alpha_to_coverage_dither:1; + unsigned int alpha_to_one:1; + unsigned int alpha_to_coverage:1; + } blend1; +}; + +struct gen6_color_calc_state +{ + struct { + unsigned int alpha_test_format:1; + unsigned int pad0:14; + unsigned int round_disable:1; + unsigned int bf_stencil_ref:8; + unsigned int stencil_ref:8; + } cc0; + + union { + float alpha_ref_f; + struct { + unsigned int ui:8; + unsigned int pad0:24; + } alpha_ref_fi; + } cc1; + + float constant_r; + float constant_g; + float constant_b; + float constant_a; +}; + +struct gen6_depth_stencil_state +{ + struct { + unsigned int pad0:3; + unsigned int bf_stencil_pass_depth_pass_op:3; + unsigned int bf_stencil_pass_depth_fail_op:3; + unsigned int bf_stencil_fail_op:3; + unsigned int bf_stencil_func:3; + unsigned int bf_stencil_enable:1; + unsigned int pad1:2; + unsigned int stencil_write_enable:1; + unsigned int stencil_pass_depth_pass_op:3; + unsigned int stencil_pass_depth_fail_op:3; + unsigned int stencil_fail_op:3; + unsigned int stencil_func:3; + unsigned int stencil_enable:1; + } ds0; + + struct { + unsigned int bf_stencil_write_mask:8; + unsigned int bf_stencil_test_mask:8; + unsigned int stencil_write_mask:8; + unsigned int stencil_test_mask:8; + } ds1; + + struct { + unsigned int pad0:26; + unsigned int depth_write_enable:1; + unsigned int depth_test_func:3; + unsigned int pad1:1; + unsigned int depth_test_enable:1; + } ds2; +}; + +typedef enum { + SAMPLER_FILTER_NEAREST = 0, + SAMPLER_FILTER_BILINEAR, + FILTER_COUNT +} sampler_filter_t; + +typedef enum { + SAMPLER_EXTEND_NONE = 0, + SAMPLER_EXTEND_REPEAT, + SAMPLER_EXTEND_PAD, + SAMPLER_EXTEND_REFLECT, + EXTEND_COUNT +} sampler_extend_t; + +typedef enum { + WM_KERNEL = 0, + WM_KERNEL_PROJECTIVE, + + WM_KERNEL_MASK, + WM_KERNEL_MASK_PROJECTIVE, + + WM_KERNEL_MASKCA, + WM_KERNEL_MASKCA_PROJECTIVE, + + WM_KERNEL_MASKCA_SRCALPHA, + WM_KERNEL_MASKCA_SRCALPHA_PROJECTIVE, + + WM_KERNEL_VIDEO_PLANAR, + WM_KERNEL_VIDEO_PACKED, + KERNEL_COUNT +} wm_kernel_t; + +#endif diff --git a/src/sna/gen5_render.c b/src/sna/gen5_render.c new file mode 100644 index 00000000..72afe98d --- /dev/null +++ b/src/sna/gen5_render.c @@ -0,0 +1,2841 @@ +/* + * Copyright © 2006,2008,2011 Intel Corporation + * Copyright © 2007 Red Hat, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Wang Zhenyu <zhenyu.z.wang@sna.com> + * Eric Anholt <eric@anholt.net> + * Carl Worth <cworth@redhat.com> + * Keith Packard <keithp@keithp.com> + * Chris Wilson <chris@chris-wilson.co.uk> + * + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <xf86.h> + +#include "sna.h" +#include "sna_reg.h" +#include "sna_render.h" +#include "sna_render_inline.h" +#include "sna_video.h" + +#include "gen5_render.h" + +#if DEBUG_RENDER +#undef DBG +#define DBG(x) ErrorF x +#else +#define NDEBUG 1 +#endif + +#define GEN5_GRF_BLOCKS(nreg) ((nreg + 15) / 16 - 1) + +/* Set up a default static partitioning of the URB, which is supposed to + * allow anything we would want to do, at potentially lower performance. + */ +#define URB_CS_ENTRY_SIZE 1 +#define URB_CS_ENTRIES 0 + +#define URB_VS_ENTRY_SIZE 1 // each 512-bit row +#define URB_VS_ENTRIES 8 // we needs at least 8 entries + +#define URB_GS_ENTRY_SIZE 0 +#define URB_GS_ENTRIES 0 + +#define URB_CLIP_ENTRY_SIZE 0 +#define URB_CLIP_ENTRIES 0 + +#define URB_SF_ENTRY_SIZE 2 +#define URB_SF_ENTRIES 1 + +/* + * this program computes dA/dx and dA/dy for the texture coordinates along + * with the base texture coordinate. It was extracted from the Mesa driver + */ + +#define SF_KERNEL_NUM_GRF 16 +#define SF_MAX_THREADS 2 + +#define PS_KERNEL_NUM_GRF 32 +#define PS_MAX_THREADS 48 + +static const uint32_t sf_kernel[][4] = { +#include "exa_sf.g4b.gen5" +}; + +static const uint32_t sf_kernel_mask[][4] = { +#include "exa_sf_mask.g4b.gen5" +}; + +static const uint32_t ps_kernel_nomask_affine[][4] = { +#include "exa_wm_xy.g4b.gen5" +#include "exa_wm_src_affine.g4b.gen5" +#include "exa_wm_src_sample_argb.g4b.gen5" +#include "exa_wm_write.g4b.gen5" +}; + +static const uint32_t ps_kernel_nomask_projective[][4] = { +#include "exa_wm_xy.g4b.gen5" +#include "exa_wm_src_projective.g4b.gen5" +#include "exa_wm_src_sample_argb.g4b.gen5" +#include "exa_wm_write.g4b.gen5" +}; + +static const uint32_t ps_kernel_maskca_affine[][4] = { +#include "exa_wm_xy.g4b.gen5" +#include "exa_wm_src_affine.g4b.gen5" +#include "exa_wm_src_sample_argb.g4b.gen5" +#include "exa_wm_mask_affine.g4b.gen5" +#include "exa_wm_mask_sample_argb.g4b.gen5" +#include "exa_wm_ca.g4b.gen5" +#include "exa_wm_write.g4b.gen5" +}; + +static const uint32_t ps_kernel_maskca_projective[][4] = { +#include "exa_wm_xy.g4b.gen5" +#include "exa_wm_src_projective.g4b.gen5" +#include "exa_wm_src_sample_argb.g4b.gen5" +#include "exa_wm_mask_projective.g4b.gen5" +#include "exa_wm_mask_sample_argb.g4b.gen5" +#include "exa_wm_ca.g4b.gen5" +#include "exa_wm_write.g4b.gen5" +}; + +static const uint32_t ps_kernel_maskca_srcalpha_affine[][4] = { +#include "exa_wm_xy.g4b.gen5" +#include "exa_wm_src_affine.g4b.gen5" +#include "exa_wm_src_sample_a.g4b.gen5" +#include "exa_wm_mask_affine.g4b.gen5" +#include "exa_wm_mask_sample_argb.g4b.gen5" +#include "exa_wm_ca_srcalpha.g4b.gen5" +#include "exa_wm_write.g4b.gen5" +}; + +static const uint32_t ps_kernel_maskca_srcalpha_projective[][4] = { +#include "exa_wm_xy.g4b.gen5" +#include "exa_wm_src_projective.g4b.gen5" +#include "exa_wm_src_sample_a.g4b.gen5" +#include "exa_wm_mask_projective.g4b.gen5" +#include "exa_wm_mask_sample_argb.g4b.gen5" +#include "exa_wm_ca_srcalpha.g4b.gen5" +#include "exa_wm_write.g4b.gen5" +}; + +static const uint32_t ps_kernel_masknoca_affine[][4] = { +#include "exa_wm_xy.g4b.gen5" +#include "exa_wm_src_affine.g4b.gen5" +#include "exa_wm_src_sample_argb.g4b.gen5" +#include "exa_wm_mask_affine.g4b.gen5" +#include "exa_wm_mask_sample_a.g4b.gen5" +#include "exa_wm_noca.g4b.gen5" +#include "exa_wm_write.g4b.gen5" +}; + +static const uint32_t ps_kernel_masknoca_projective[][4] = { +#include "exa_wm_xy.g4b.gen5" +#include "exa_wm_src_projective.g4b.gen5" +#include "exa_wm_src_sample_argb.g4b.gen5" +#include "exa_wm_mask_projective.g4b.gen5" +#include "exa_wm_mask_sample_a.g4b.gen5" +#include "exa_wm_noca.g4b.gen5" +#include "exa_wm_write.g4b.gen5" +}; + +static const uint32_t ps_kernel_packed_static[][4] = { +#include "exa_wm_xy.g4b.gen5" +#include "exa_wm_src_affine.g4b.gen5" +#include "exa_wm_src_sample_argb.g4b.gen5" +#include "exa_wm_yuv_rgb.g4b.gen5" +#include "exa_wm_write.g4b.gen5" +}; + +static const uint32_t ps_kernel_planar_static[][4] = { +#include "exa_wm_xy.g4b.gen5" +#include "exa_wm_src_affine.g4b.gen5" +#include "exa_wm_src_sample_planar.g4b.gen5" +#include "exa_wm_yuv_rgb.g4b.gen5" +#include "exa_wm_write.g4b.gen5" +}; + +#define KERNEL(kernel_enum, kernel, masked) \ + [kernel_enum] = {&kernel, sizeof(kernel), masked} +static const struct wm_kernel_info { + const void *data; + unsigned int size; + Bool has_mask; +} wm_kernels[] = { + KERNEL(WM_KERNEL, ps_kernel_nomask_affine, FALSE), + KERNEL(WM_KERNEL_PROJECTIVE, ps_kernel_nomask_projective, FALSE), + + KERNEL(WM_KERNEL_MASK, ps_kernel_masknoca_affine, TRUE), + KERNEL(WM_KERNEL_MASK_PROJECTIVE, ps_kernel_masknoca_projective, TRUE), + + KERNEL(WM_KERNEL_MASKCA, ps_kernel_maskca_affine, TRUE), + KERNEL(WM_KERNEL_MASKCA_PROJECTIVE, ps_kernel_maskca_projective, TRUE), + + KERNEL(WM_KERNEL_MASKCA_SRCALPHA, + ps_kernel_maskca_srcalpha_affine, TRUE), + KERNEL(WM_KERNEL_MASKCA_SRCALPHA_PROJECTIVE, + ps_kernel_maskca_srcalpha_projective, TRUE), + + KERNEL(WM_KERNEL_VIDEO_PLANAR, ps_kernel_planar_static, FALSE), + KERNEL(WM_KERNEL_VIDEO_PACKED, ps_kernel_packed_static, FALSE), +}; +#undef KERNEL + +static const struct blendinfo { + Bool src_alpha; + uint32_t src_blend; + uint32_t dst_blend; +} gen5_blend_op[] = { + /* Clear */ {0, GEN5_BLENDFACTOR_ZERO, GEN5_BLENDFACTOR_ZERO}, + /* Src */ {0, GEN5_BLENDFACTOR_ONE, GEN5_BLENDFACTOR_ZERO}, + /* Dst */ {0, GEN5_BLENDFACTOR_ZERO, GEN5_BLENDFACTOR_ONE}, + /* Over */ {1, GEN5_BLENDFACTOR_ONE, GEN5_BLENDFACTOR_INV_SRC_ALPHA}, + /* OverReverse */ {0, GEN5_BLENDFACTOR_INV_DST_ALPHA, GEN5_BLENDFACTOR_ONE}, + /* In */ {0, GEN5_BLENDFACTOR_DST_ALPHA, GEN5_BLENDFACTOR_ZERO}, + /* InReverse */ {1, GEN5_BLENDFACTOR_ZERO, GEN5_BLENDFACTOR_SRC_ALPHA}, + /* Out */ {0, GEN5_BLENDFACTOR_INV_DST_ALPHA, GEN5_BLENDFACTOR_ZERO}, + /* OutReverse */ {1, GEN5_BLENDFACTOR_ZERO, GEN5_BLENDFACTOR_INV_SRC_ALPHA}, + /* Atop */ {1, GEN5_BLENDFACTOR_DST_ALPHA, GEN5_BLENDFACTOR_INV_SRC_ALPHA}, + /* AtopReverse */ {1, GEN5_BLENDFACTOR_INV_DST_ALPHA, GEN5_BLENDFACTOR_SRC_ALPHA}, + /* Xor */ {1, GEN5_BLENDFACTOR_INV_DST_ALPHA, GEN5_BLENDFACTOR_INV_SRC_ALPHA}, + /* Add */ {0, GEN5_BLENDFACTOR_ONE, GEN5_BLENDFACTOR_ONE}, +}; + +/** + * Highest-valued BLENDFACTOR used in gen5_blend_op. + * + * This leaves out GEN5_BLENDFACTOR_INV_DST_COLOR, + * GEN5_BLENDFACTOR_INV_CONST_{COLOR,ALPHA}, + * GEN5_BLENDFACTOR_INV_SRC1_{COLOR,ALPHA} + */ +#define GEN5_BLENDFACTOR_COUNT (GEN5_BLENDFACTOR_INV_DST_ALPHA + 1) + +/* FIXME: surface format defined in gen5_defines.h, shared Sampling engine + * 1.7.2 + */ +static const struct formatinfo { + CARD32 pict_fmt; + uint32_t card_fmt; +} gen5_tex_formats[] = { + {PICT_a8, GEN5_SURFACEFORMAT_A8_UNORM}, + {PICT_a8r8g8b8, GEN5_SURFACEFORMAT_B8G8R8A8_UNORM}, + {PICT_x8r8g8b8, GEN5_SURFACEFORMAT_B8G8R8X8_UNORM}, + {PICT_a8b8g8r8, GEN5_SURFACEFORMAT_R8G8B8A8_UNORM}, + {PICT_x8b8g8r8, GEN5_SURFACEFORMAT_R8G8B8X8_UNORM}, + {PICT_r8g8b8, GEN5_SURFACEFORMAT_R8G8B8_UNORM}, + {PICT_r5g6b5, GEN5_SURFACEFORMAT_B5G6R5_UNORM}, + {PICT_a1r5g5b5, GEN5_SURFACEFORMAT_B5G5R5A1_UNORM}, + {PICT_a2r10g10b10, GEN5_SURFACEFORMAT_B10G10R10A2_UNORM}, + {PICT_x2r10g10b10, GEN5_SURFACEFORMAT_B10G10R10X2_UNORM}, + {PICT_a2b10g10r10, GEN5_SURFACEFORMAT_R10G10B10A2_UNORM}, + {PICT_x2r10g10b10, GEN5_SURFACEFORMAT_B10G10R10X2_UNORM}, + {PICT_a4r4g4b4, GEN5_SURFACEFORMAT_B4G4R4A4_UNORM}, +}; + +#define BLEND_OFFSET(s, d) \ + (((s) * GEN5_BLENDFACTOR_COUNT + (d)) * 64) + +#define SAMPLER_OFFSET(sf, se, mf, me, k) \ + ((((((sf) * EXTEND_COUNT + (se)) * FILTER_COUNT + (mf)) * EXTEND_COUNT + (me)) * KERNEL_COUNT + (k)) * 64) + +static bool +gen5_emit_pipelined_pointers(struct sna *sna, + const struct sna_composite_op *op, + int blend, int kernel); + +#define OUT_BATCH(v) batch_emit(sna, v) +#define OUT_VERTEX(x,y) vertex_emit_2s(sna, x,y) +#define OUT_VERTEX_F(v) vertex_emit(sna, v) + +static int +gen5_choose_composite_kernel(int op, Bool has_mask, Bool is_ca, Bool is_affine) +{ + int base; + + if (has_mask) { + if (is_ca) { + if (gen5_blend_op[op].src_alpha) + base = WM_KERNEL_MASKCA_SRCALPHA; + else + base = WM_KERNEL_MASKCA; + } else + base = WM_KERNEL_MASK; + } else + base = WM_KERNEL; + + return base + !is_affine; +} + +static void gen5_magic_ca_pass(struct sna *sna, + const struct sna_composite_op *op) +{ + struct gen5_render_state *state = &sna->render_state.gen5; + + if (!op->need_magic_ca_pass) + return; + + DBG(("%s: CA fixup\n", __FUNCTION__)); + + gen5_emit_pipelined_pointers + (sna, op, PictOpAdd, + gen5_choose_composite_kernel(PictOpAdd, + TRUE, TRUE, op->is_affine)); + + OUT_BATCH(GEN5_3DPRIMITIVE | + GEN5_3DPRIMITIVE_VERTEX_SEQUENTIAL | + (_3DPRIM_RECTLIST << GEN5_3DPRIMITIVE_TOPOLOGY_SHIFT) | + (0 << 9) | + 4); + OUT_BATCH(sna->render.vertex_index - sna->render.vertex_start); + OUT_BATCH(sna->render.vertex_start); + OUT_BATCH(1); /* single instance */ + OUT_BATCH(0); /* start instance location */ + OUT_BATCH(0); /* index buffer offset, ignored */ + + state->last_primitive = sna->kgem.nbatch; +} + +static void gen5_vertex_flush(struct sna *sna) +{ + if (sna->render_state.gen5.vertex_offset == 0) + return; + + DBG(("%s[%x] = %d\n", __FUNCTION__, + 4*sna->render_state.gen5.vertex_offset, + sna->render.vertex_index - sna->render.vertex_start)); + sna->kgem.batch[sna->render_state.gen5.vertex_offset] = + sna->render.vertex_index - sna->render.vertex_start; + sna->render_state.gen5.vertex_offset = 0; + + if (sna->render.op) + gen5_magic_ca_pass(sna, sna->render.op); +} + +static void gen5_vertex_finish(struct sna *sna, Bool last) +{ + struct kgem_bo *bo; + int i, delta; + + gen5_vertex_flush(sna); + if (!sna->render.vertex_used) + return; + + /* Note: we only need dword alignment (currently) */ + + if (last && sna->kgem.nbatch + sna->render.vertex_used <= sna->kgem.surface) { + DBG(("%s: copy to batch: %d @ %d\n", __FUNCTION__, + sna->render.vertex_used, sna->kgem.nbatch)); + memcpy(sna->kgem.batch + sna->kgem.nbatch, + sna->render.vertex_data, + sna->render.vertex_used * 4); + delta = sna->kgem.nbatch * 4; + bo = NULL; + sna->kgem.nbatch += sna->render.vertex_used; + } else { + bo = kgem_create_linear(&sna->kgem, 4*sna->render.vertex_used); + if (bo && !kgem_bo_write(&sna->kgem, bo, + sna->render.vertex_data, + 4*sna->render.vertex_used)) { + kgem_bo_destroy(&sna->kgem, bo); + return; + } + delta = 0; + DBG(("%s: new vbo: %d\n", __FUNCTION__, + sna->render.vertex_used)); + } + + for (i = 0; i < ARRAY_SIZE(sna->render.vertex_reloc); i++) { + if (sna->render.vertex_reloc[i]) { + DBG(("%s: reloc[%d] = %d\n", __FUNCTION__, + i, sna->render.vertex_reloc[i])); + + sna->kgem.batch[sna->render.vertex_reloc[i]] = + kgem_add_reloc(&sna->kgem, + sna->render.vertex_reloc[i], + bo, + I915_GEM_DOMAIN_VERTEX << 16, + delta); + sna->kgem.batch[sna->render.vertex_reloc[i]+1] = + kgem_add_reloc(&sna->kgem, + sna->render.vertex_reloc[i]+1, + bo, + I915_GEM_DOMAIN_VERTEX << 16, + delta + sna->render.vertex_used * 4 - 1); + sna->render.vertex_reloc[i] = 0; + } + } + + if (bo) + kgem_bo_destroy(&sna->kgem, bo); + + sna->render.vertex_used = 0; + sna->render.vertex_index = 0; + sna->render_state.gen5.vb_id = 0; +} + +static uint32_t gen5_get_blend(int op, + Bool has_component_alpha, + uint32_t dst_format) +{ + uint32_t src, dst; + + src = gen5_blend_op[op].src_blend; + dst = gen5_blend_op[op].dst_blend; + + /* If there's no dst alpha channel, adjust the blend op so that we'll treat + * it as always 1. + */ + if (PICT_FORMAT_A(dst_format) == 0) { + if (src == GEN5_BLENDFACTOR_DST_ALPHA) + src = GEN5_BLENDFACTOR_ONE; + else if (src == GEN5_BLENDFACTOR_INV_DST_ALPHA) + src = GEN5_BLENDFACTOR_ZERO; + } + + /* If the source alpha is being used, then we should only be in a + * case where the source blend factor is 0, and the source blend + * value is the mask channels multiplied by the source picture's alpha. + */ + if (has_component_alpha && gen5_blend_op[op].src_alpha) { + if (dst == GEN5_BLENDFACTOR_SRC_ALPHA) + dst = GEN5_BLENDFACTOR_SRC_COLOR; + else if (dst == GEN5_BLENDFACTOR_INV_SRC_ALPHA) + dst = GEN5_BLENDFACTOR_INV_SRC_COLOR; + } + + DBG(("blend op=%d, dst=%x [A=%d] => src=%d, dst=%d => offset=%x\n", + op, dst_format, PICT_FORMAT_A(dst_format), + src, dst, BLEND_OFFSET(src, dst))); + return BLEND_OFFSET(src, dst); +} + +static uint32_t gen5_get_dest_format(PictFormat format) +{ + switch (format) { + case PICT_a8r8g8b8: + case PICT_x8r8g8b8: + default: + return GEN5_SURFACEFORMAT_B8G8R8A8_UNORM; + case PICT_a8b8g8r8: + case PICT_x8b8g8r8: + return GEN5_SURFACEFORMAT_R8G8B8A8_UNORM; + case PICT_a2r10g10b10: + case PICT_x2r10g10b10: + return GEN5_SURFACEFORMAT_B10G10R10A2_UNORM; + case PICT_r5g6b5: + return GEN5_SURFACEFORMAT_B5G6R5_UNORM; + case PICT_x1r5g5b5: + case PICT_a1r5g5b5: + return GEN5_SURFACEFORMAT_B5G5R5A1_UNORM; + case PICT_a8: + return GEN5_SURFACEFORMAT_A8_UNORM; + case PICT_a4r4g4b4: + case PICT_x4r4g4b4: + return GEN5_SURFACEFORMAT_B4G4R4A4_UNORM; + } +} + +static Bool gen5_check_dst_format(PictFormat format) +{ + switch (format) { + case PICT_a8r8g8b8: + case PICT_x8r8g8b8: + case PICT_a8b8g8r8: + case PICT_x8b8g8r8: + case PICT_a2r10g10b10: + case PICT_x2r10g10b10: + case PICT_r5g6b5: + case PICT_x1r5g5b5: + case PICT_a1r5g5b5: + case PICT_a8: + case PICT_a4r4g4b4: + case PICT_x4r4g4b4: + return TRUE; + } + return FALSE; +} + +static uint32_t gen5_get_dest_format_for_depth(int depth) +{ + switch (depth) { + case 32: + case 24: + default: return GEN5_SURFACEFORMAT_B8G8R8A8_UNORM; + case 30: return GEN5_SURFACEFORMAT_B10G10R10A2_UNORM; + case 16: return GEN5_SURFACEFORMAT_B5G6R5_UNORM; + case 8: return GEN5_SURFACEFORMAT_A8_UNORM; + } +} + +static uint32_t gen5_get_card_format_for_depth(int depth) +{ + switch (depth) { + case 32: + default: return GEN5_SURFACEFORMAT_B8G8R8A8_UNORM; + case 30: return GEN5_SURFACEFORMAT_B10G10R10A2_UNORM; + case 24: return GEN5_SURFACEFORMAT_B8G8R8X8_UNORM; + case 16: return GEN5_SURFACEFORMAT_B5G6R5_UNORM; + case 8: return GEN5_SURFACEFORMAT_A8_UNORM; + } +} + +static bool gen5_format_is_dst(uint32_t format) +{ + switch (format) { + case GEN5_SURFACEFORMAT_B8G8R8A8_UNORM: + case GEN5_SURFACEFORMAT_R8G8B8A8_UNORM: + case GEN5_SURFACEFORMAT_B10G10R10A2_UNORM: + case GEN5_SURFACEFORMAT_B5G6R5_UNORM: + case GEN5_SURFACEFORMAT_B5G5R5A1_UNORM: + case GEN5_SURFACEFORMAT_A8_UNORM: + case GEN5_SURFACEFORMAT_B4G4R4A4_UNORM: + return true; + default: + return false; + } +} + +typedef struct gen5_surface_state_padded { + struct gen5_surface_state state; + char pad[32 - sizeof(struct gen5_surface_state)]; +} gen5_surface_state_padded; + +static void null_create(struct sna_static_stream *stream) +{ + /* A bunch of zeros useful for legacy border color and depth-stencil */ + sna_static_stream_map(stream, 64, 64); +} + +static void +sampler_state_init(struct gen5_sampler_state *sampler_state, + sampler_filter_t filter, + sampler_extend_t extend) +{ + sampler_state->ss0.lod_preclamp = 1; /* GL mode */ + + /* We use the legacy mode to get the semantics specified by + * the Render extension. */ + sampler_state->ss0.border_color_mode = GEN5_BORDER_COLOR_MODE_LEGACY; + + switch (filter) { + default: + case SAMPLER_FILTER_NEAREST: + sampler_state->ss0.min_filter = GEN5_MAPFILTER_NEAREST; + sampler_state->ss0.mag_filter = GEN5_MAPFILTER_NEAREST; + break; + case SAMPLER_FILTER_BILINEAR: + sampler_state->ss0.min_filter = GEN5_MAPFILTER_LINEAR; + sampler_state->ss0.mag_filter = GEN5_MAPFILTER_LINEAR; + break; + } + + switch (extend) { + default: + case SAMPLER_EXTEND_NONE: + sampler_state->ss1.r_wrap_mode = GEN5_TEXCOORDMODE_CLAMP_BORDER; + sampler_state->ss1.s_wrap_mode = GEN5_TEXCOORDMODE_CLAMP_BORDER; + sampler_state->ss1.t_wrap_mode = GEN5_TEXCOORDMODE_CLAMP_BORDER; + break; + case SAMPLER_EXTEND_REPEAT: + sampler_state->ss1.r_wrap_mode = GEN5_TEXCOORDMODE_WRAP; + sampler_state->ss1.s_wrap_mode = GEN5_TEXCOORDMODE_WRAP; + sampler_state->ss1.t_wrap_mode = GEN5_TEXCOORDMODE_WRAP; + break; + case SAMPLER_EXTEND_PAD: + sampler_state->ss1.r_wrap_mode = GEN5_TEXCOORDMODE_CLAMP; + sampler_state->ss1.s_wrap_mode = GEN5_TEXCOORDMODE_CLAMP; + sampler_state->ss1.t_wrap_mode = GEN5_TEXCOORDMODE_CLAMP; + break; + case SAMPLER_EXTEND_REFLECT: + sampler_state->ss1.r_wrap_mode = GEN5_TEXCOORDMODE_MIRROR; + sampler_state->ss1.s_wrap_mode = GEN5_TEXCOORDMODE_MIRROR; + sampler_state->ss1.t_wrap_mode = GEN5_TEXCOORDMODE_MIRROR; + break; + } +} + +static uint32_t gen5_get_card_format(PictFormat format) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(gen5_tex_formats); i++) { + if (gen5_tex_formats[i].pict_fmt == format) + return gen5_tex_formats[i].card_fmt; + } + return -1; +} + +static uint32_t gen5_filter(uint32_t filter) +{ + switch (filter) { + default: + assert(0); + case PictFilterNearest: + return SAMPLER_FILTER_NEAREST; + case PictFilterBilinear: + return SAMPLER_FILTER_BILINEAR; + } +} + +static uint32_t gen5_check_filter(PicturePtr picture) +{ + switch (picture->filter) { + case PictFilterNearest: + case PictFilterBilinear: + return TRUE; + default: + return FALSE; + } +} + +static uint32_t gen5_repeat(uint32_t repeat) +{ + switch (repeat) { + default: + assert(0); + case RepeatNone: + return SAMPLER_EXTEND_NONE; + case RepeatNormal: + return SAMPLER_EXTEND_REPEAT; + case RepeatPad: + return SAMPLER_EXTEND_PAD; + case RepeatReflect: + return SAMPLER_EXTEND_REFLECT; + } +} + +static bool gen5_check_repeat(PicturePtr picture) +{ + if (!picture->repeat) + return TRUE; + + switch (picture->repeatType) { + case RepeatNone: + case RepeatNormal: + case RepeatPad: + case RepeatReflect: + return TRUE; + default: + return FALSE; + } +} + +/** + * Sets up the common fields for a surface state buffer for the given + * picture in the given surface state buffer. + */ +static int +gen5_bind_bo(struct sna *sna, + struct kgem_bo *bo, + uint32_t width, + uint32_t height, + uint32_t format, + Bool is_dst) +{ + struct gen5_surface_state *ss; + uint32_t domains; + uint16_t offset; + + /* After the first bind, we manage the cache domains within the batch */ + if (is_dst) { + domains = I915_GEM_DOMAIN_RENDER << 16 | I915_GEM_DOMAIN_RENDER; + kgem_bo_mark_dirty(bo); + } else { + domains = I915_GEM_DOMAIN_SAMPLER << 16; + is_dst = gen5_format_is_dst(format); + } + + offset = sna->kgem.surface - sizeof(struct gen5_surface_state_padded) / sizeof(uint32_t); + offset *= sizeof(uint32_t); + + if (is_dst) { + if (bo->dst_bound) + return bo->dst_bound; + + bo->dst_bound = offset; + } else { + if (bo->src_bound) + return bo->src_bound; + + bo->src_bound = offset; + } + + sna->kgem.surface -= + sizeof(struct gen5_surface_state_padded) / sizeof(uint32_t); + ss = memset(sna->kgem.batch + sna->kgem.surface, 0, sizeof(*ss)); + + ss->ss0.surface_type = GEN5_SURFACE_2D; + ss->ss0.surface_format = format; + + ss->ss0.data_return_format = GEN5_SURFACERETURNFORMAT_FLOAT32; + ss->ss0.color_blend = 1; + ss->ss1.base_addr = + kgem_add_reloc(&sna->kgem, + sna->kgem.surface + 1, + bo, domains, 0); + + ss->ss2.height = height - 1; + ss->ss2.width = width - 1; + ss->ss3.pitch = bo->pitch - 1; + ss->ss3.tile_walk = bo->tiling == I915_TILING_Y; + ss->ss3.tiled_surface = bo->tiling != I915_TILING_NONE; + + DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> %s\n", + offset, bo->handle, ss->ss1.base_addr, + ss->ss0.surface_format, width, height, bo->pitch, bo->tiling, + domains & 0xffff ? "render" : "sampler")); + + return offset; +} + +fastcall static void +gen5_emit_composite_primitive_solid(struct sna *sna, + const struct sna_composite_op *op, + const struct sna_composite_rectangles *r) +{ + float *v; + union { + struct sna_coordinate p; + float f; + } dst; + + v = sna->render.vertex_data + sna->render.vertex_used; + sna->render.vertex_used += 9; + + dst.p.x = r->dst.x + r->width; + dst.p.y = r->dst.y + r->height; + v[0] = dst.f; + v[1] = 1.; + v[2] = 1.; + + dst.p.x = r->dst.x; + v[3] = dst.f; + v[4] = 0.; + v[5] = 1.; + + dst.p.y = r->dst.y; + v[6] = dst.f; + v[7] = 0.; + v[8] = 0.; +} + +fastcall static void +gen5_emit_composite_primitive_identity_source(struct sna *sna, + const struct sna_composite_op *op, + const struct sna_composite_rectangles *r) +{ + const float *sf = op->src.scale; + float sx, sy, *v; + union { + struct sna_coordinate p; + float f; + } dst; + + v = sna->render.vertex_data + sna->render.vertex_used; + sna->render.vertex_used += 9; + + sx = r->src.x + op->src.offset[0]; + sy = r->src.y + op->src.offset[1]; + + dst.p.x = r->dst.x + r->width; + dst.p.y = r->dst.y + r->height; + v[0] = dst.f; + v[1] = (sx + r->width) * sf[0]; + v[2] = (sy + r->height) * sf[1]; + + dst.p.x = r->dst.x; + v[3] = dst.f; + v[4] = sx * sf[0]; + v[5] = v[2]; + + dst.p.y = r->dst.y; + v[6] = dst.f; + v[7] = v[4]; + v[8] = sy * sf[1]; +} + +fastcall static void +gen5_emit_composite_primitive_affine_source(struct sna *sna, + const struct sna_composite_op *op, + const struct sna_composite_rectangles *r) +{ + union { + struct sna_coordinate p; + float f; + } dst; + float *v; + + v = sna->render.vertex_data + sna->render.vertex_used; + sna->render.vertex_used += 9; + + dst.p.x = r->dst.x + r->width; + dst.p.y = r->dst.y + r->height; + v[0] = dst.f; + _sna_get_transformed_coordinates(op->src.offset[0] + r->src.x + r->width, + op->src.offset[1] + r->src.y + r->height, + op->src.transform, + &v[1], &v[2]); + v[1] *= op->src.scale[0]; + v[2] *= op->src.scale[1]; + + dst.p.x = r->dst.x; + v[3] = dst.f; + _sna_get_transformed_coordinates(op->src.offset[0] + r->src.x, + op->src.offset[1] + r->src.y + r->height, + op->src.transform, + &v[4], &v[5]); + v[4] *= op->src.scale[0]; + v[5] *= op->src.scale[1]; + + dst.p.y = r->dst.y; + v[6] = dst.f; + _sna_get_transformed_coordinates(op->src.offset[0] + r->src.x, + op->src.offset[1] + r->src.y, + op->src.transform, + &v[7], &v[8]); + v[7] *= op->src.scale[0]; + v[8] *= op->src.scale[1]; +} + +fastcall static void +gen5_emit_composite_primitive_identity_source_mask(struct sna *sna, + const struct sna_composite_op *op, + const struct sna_composite_rectangles *r) +{ + union { + struct sna_coordinate p; + float f; + } dst; + float src_x, src_y; + float msk_x, msk_y; + float w, h; + float *v; + + src_x = r->src.x + op->src.offset[0]; + src_y = r->src.y + op->src.offset[1]; + msk_x = r->mask.x + op->mask.offset[0]; + msk_y = r->mask.y + op->mask.offset[1]; + w = r->width; + h = r->height; + + v = sna->render.vertex_data + sna->render.vertex_used; + sna->render.vertex_used += 15; + + dst.p.x = r->dst.x + r->width; + dst.p.y = r->dst.y + r->height; + v[0] = dst.f; + v[1] = (src_x + w) * op->src.scale[0]; + v[2] = (src_y + h) * op->src.scale[1]; + v[3] = (msk_x + w) * op->mask.scale[0]; + v[4] = (msk_y + h) * op->mask.scale[1]; + + dst.p.x = r->dst.x; + v[5] = dst.f; + v[6] = src_x * op->src.scale[0]; + v[7] = v[2]; + v[8] = msk_x * op->mask.scale[0]; + v[9] = v[4]; + + dst.p.y = r->dst.y; + v[10] = dst.f; + v[11] = v[6]; + v[12] = src_y * op->src.scale[1]; + v[13] = v[8]; + v[14] = msk_y * op->mask.scale[1]; +} + +fastcall static void +gen5_emit_composite_primitive(struct sna *sna, + const struct sna_composite_op *op, + const struct sna_composite_rectangles *r) +{ + float src_x[3], src_y[3], src_w[3], mask_x[3], mask_y[3], mask_w[3]; + Bool is_affine = op->is_affine; + const float *src_sf = op->src.scale; + const float *mask_sf = op->mask.scale; + + if (is_affine) { + sna_get_transformed_coordinates(r->src.x + op->src.offset[0], + r->src.y + op->src.offset[1], + op->src.transform, + &src_x[0], + &src_y[0]); + + sna_get_transformed_coordinates(r->src.x + op->src.offset[0], + r->src.y + op->src.offset[1] + r->height, + op->src.transform, + &src_x[1], + &src_y[1]); + + sna_get_transformed_coordinates(r->src.x + op->src.offset[0] + r->width, + r->src.y + op->src.offset[1] + r->height, + op->src.transform, + &src_x[2], + &src_y[2]); + } else { + if (!sna_get_transformed_coordinates_3d(r->src.x + op->src.offset[0], + r->src.y + op->src.offset[1], + op->src.transform, + &src_x[0], + &src_y[0], + &src_w[0])) + return; + + if (!sna_get_transformed_coordinates_3d(r->src.x + op->src.offset[0], + r->src.y + op->src.offset[1] + r->height, + op->src.transform, + &src_x[1], + &src_y[1], + &src_w[1])) + return; + + if (!sna_get_transformed_coordinates_3d(r->src.x + op->src.offset[0] + r->width, + r->src.y + op->src.offset[1] + r->height, + op->src.transform, + &src_x[2], + &src_y[2], + &src_w[2])) + return; + } + + if (op->mask.bo) { + if (is_affine) { + sna_get_transformed_coordinates(r->mask.x + op->mask.offset[0], + r->mask.y + op->mask.offset[1], + op->mask.transform, + &mask_x[0], + &mask_y[0]); + + sna_get_transformed_coordinates(r->mask.x + op->mask.offset[0], + r->mask.y + op->mask.offset[1] + r->height, + op->mask.transform, + &mask_x[1], + &mask_y[1]); + + sna_get_transformed_coordinates(r->mask.x + op->mask.offset[0] + r->width, + r->mask.y + op->mask.offset[1] + r->height, + op->mask.transform, + &mask_x[2], + &mask_y[2]); + } else { + if (!sna_get_transformed_coordinates_3d(r->mask.x + op->mask.offset[0], + r->mask.y + op->mask.offset[1], + op->mask.transform, + &mask_x[0], + &mask_y[0], + &mask_w[0])) + return; + + if (!sna_get_transformed_coordinates_3d(r->mask.x + op->mask.offset[0], + r->mask.y + op->mask.offset[1] + r->height, + op->mask.transform, + &mask_x[1], + &mask_y[1], + &mask_w[1])) + return; + + if (!sna_get_transformed_coordinates_3d(r->mask.x + op->mask.offset[0] + r->width, + r->mask.y + op->mask.offset[1] + r->height, + op->mask.transform, + &mask_x[2], + &mask_y[2], + &mask_w[2])) + return; + } + } + + OUT_VERTEX(r->dst.x + r->width, r->dst.y + r->height); + OUT_VERTEX_F(src_x[2] * src_sf[0]); + OUT_VERTEX_F(src_y[2] * src_sf[1]); + if (!is_affine) + OUT_VERTEX_F(src_w[2]); + if (op->mask.bo) { + OUT_VERTEX_F(mask_x[2] * mask_sf[0]); + OUT_VERTEX_F(mask_y[2] * mask_sf[1]); + if (!is_affine) + OUT_VERTEX_F(mask_w[2]); + } + + OUT_VERTEX(r->dst.x, r->dst.y + r->height); + OUT_VERTEX_F(src_x[1] * src_sf[0]); + OUT_VERTEX_F(src_y[1] * src_sf[1]); + if (!is_affine) + OUT_VERTEX_F(src_w[1]); + if (op->mask.bo) { + OUT_VERTEX_F(mask_x[1] * mask_sf[0]); + OUT_VERTEX_F(mask_y[1] * mask_sf[1]); + if (!is_affine) + OUT_VERTEX_F(mask_w[1]); + } + + OUT_VERTEX(r->dst.x, r->dst.y); + OUT_VERTEX_F(src_x[0] * src_sf[0]); + OUT_VERTEX_F(src_y[0] * src_sf[1]); + if (!is_affine) + OUT_VERTEX_F(src_w[0]); + if (op->mask.bo) { + OUT_VERTEX_F(mask_x[0] * mask_sf[0]); + OUT_VERTEX_F(mask_y[0] * mask_sf[1]); + if (!is_affine) + OUT_VERTEX_F(mask_w[0]); + } +} + +static void gen5_emit_vertex_buffer(struct sna *sna, + const struct sna_composite_op *op) +{ + int id = op->u.gen5.ve_id; + + OUT_BATCH(GEN5_3DSTATE_VERTEX_BUFFERS | 3); + OUT_BATCH((id << VB0_BUFFER_INDEX_SHIFT) | VB0_VERTEXDATA | + (4*op->floats_per_vertex << VB0_BUFFER_PITCH_SHIFT)); + sna->render.vertex_reloc[id] = sna->kgem.nbatch; + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + + sna->render_state.gen5.vb_id |= 1 << id; +} + +static void gen5_emit_primitive(struct sna *sna) +{ + if (sna->kgem.nbatch == sna->render_state.gen5.last_primitive) { + sna->render_state.gen5.vertex_offset = sna->kgem.nbatch - 5; + return; + } + + OUT_BATCH(GEN5_3DPRIMITIVE | + GEN5_3DPRIMITIVE_VERTEX_SEQUENTIAL | + (_3DPRIM_RECTLIST << GEN5_3DPRIMITIVE_TOPOLOGY_SHIFT) | + (0 << 9) | + 4); + sna->render_state.gen5.vertex_offset = sna->kgem.nbatch; + OUT_BATCH(0); /* vertex count, to be filled in later */ + OUT_BATCH(sna->render.vertex_index); + OUT_BATCH(1); /* single instance */ + OUT_BATCH(0); /* start instance location */ + OUT_BATCH(0); /* index buffer offset, ignored */ + sna->render.vertex_start = sna->render.vertex_index; + + sna->render_state.gen5.last_primitive = sna->kgem.nbatch; +} + +static bool gen5_rectangle_begin(struct sna *sna, + const struct sna_composite_op *op) +{ + int id = op->u.gen5.ve_id; + int ndwords; + + ndwords = 0; + if ((sna->render_state.gen5.vb_id & (1 << id)) == 0) + ndwords += 5; + if (sna->render_state.gen5.vertex_offset == 0) + ndwords += op->need_magic_ca_pass ? 20 : 6; + if (ndwords == 0) + return true; + + if (!kgem_check_batch(&sna->kgem, ndwords)) + return false; + + if ((sna->render_state.gen5.vb_id & (1 << id)) == 0) + gen5_emit_vertex_buffer(sna, op); + if (sna->render_state.gen5.vertex_offset == 0) + gen5_emit_primitive(sna); + + return true; +} + +static int gen5_get_rectangles__flush(struct sna *sna) +{ + if (!kgem_check_batch(&sna->kgem, 25)) + return 0; + if (sna->kgem.nexec > KGEM_EXEC_SIZE(&sna->kgem) - 1) + return 0; + if (sna->kgem.nreloc > KGEM_RELOC_SIZE(&sna->kgem) - 1) + return 0; + + gen5_vertex_finish(sna, FALSE); + sna->render.vertex_index = 0; + + return ARRAY_SIZE(sna->render.vertex_data); +} + +inline static int gen5_get_rectangles(struct sna *sna, + const struct sna_composite_op *op, + int want) +{ + int rem = vertex_space(sna); + + if (rem < 3*op->floats_per_vertex) { + DBG(("flushing vbo for %s: %d < %d\n", + __FUNCTION__, rem, 3*op->floats_per_vertex)); + rem = gen5_get_rectangles__flush(sna); + if (rem == 0) + return 0; + } + + if (!gen5_rectangle_begin(sna, op)) + return 0; + + if (want * op->floats_per_vertex*3 > rem) + want = rem / (3*op->floats_per_vertex); + + sna->render.vertex_index += 3*want; + return want; +} + +static uint32_t * +gen5_composite_get_binding_table(struct sna *sna, + const struct sna_composite_op *op, + uint16_t *offset) +{ + uint32_t *table; + + sna->kgem.surface -= + sizeof(struct gen5_surface_state_padded) / sizeof(uint32_t); + /* Clear all surplus entries to zero in case of prefetch */ + table = memset(sna->kgem.batch + sna->kgem.surface, + 0, sizeof(struct gen5_surface_state_padded)); + *offset = sna->kgem.surface; + + DBG(("%s(%x)\n", __FUNCTION__, 4*sna->kgem.surface)); + + return table; +} + +static void +gen5_emit_sip(struct sna *sna) +{ + /* Set system instruction pointer */ + OUT_BATCH(GEN5_STATE_SIP | 0); + OUT_BATCH(0); +} + +static void +gen5_emit_urb(struct sna *sna) +{ + int urb_vs_start, urb_vs_size; + int urb_gs_start, urb_gs_size; + int urb_clip_start, urb_clip_size; + int urb_sf_start, urb_sf_size; + int urb_cs_start, urb_cs_size; + + urb_vs_start = 0; + urb_vs_size = URB_VS_ENTRIES * URB_VS_ENTRY_SIZE; + urb_gs_start = urb_vs_start + urb_vs_size; + urb_gs_size = URB_GS_ENTRIES * URB_GS_ENTRY_SIZE; + urb_clip_start = urb_gs_start + urb_gs_size; + urb_clip_size = URB_CLIP_ENTRIES * URB_CLIP_ENTRY_SIZE; + urb_sf_start = urb_clip_start + urb_clip_size; + urb_sf_size = URB_SF_ENTRIES * URB_SF_ENTRY_SIZE; + urb_cs_start = urb_sf_start + urb_sf_size; + urb_cs_size = URB_CS_ENTRIES * URB_CS_ENTRY_SIZE; + + OUT_BATCH(GEN5_URB_FENCE | + UF0_CS_REALLOC | + UF0_SF_REALLOC | + UF0_CLIP_REALLOC | + UF0_GS_REALLOC | + UF0_VS_REALLOC | + 1); + OUT_BATCH(((urb_clip_start + urb_clip_size) << UF1_CLIP_FENCE_SHIFT) | + ((urb_gs_start + urb_gs_size) << UF1_GS_FENCE_SHIFT) | + ((urb_vs_start + urb_vs_size) << UF1_VS_FENCE_SHIFT)); + OUT_BATCH(((urb_cs_start + urb_cs_size) << UF2_CS_FENCE_SHIFT) | + ((urb_sf_start + urb_sf_size) << UF2_SF_FENCE_SHIFT)); + + /* Constant buffer state */ + OUT_BATCH(GEN5_CS_URB_STATE | 0); + OUT_BATCH((URB_CS_ENTRY_SIZE - 1) << 4 | URB_CS_ENTRIES << 0); +} + +static void +gen5_emit_state_base_address(struct sna *sna) +{ + assert(sna->render_state.gen5.general_bo->proxy == NULL); + OUT_BATCH(GEN5_STATE_BASE_ADDRESS | 6); + OUT_BATCH(kgem_add_reloc(&sna->kgem, /* general */ + sna->kgem.nbatch, + sna->render_state.gen5.general_bo, + I915_GEM_DOMAIN_INSTRUCTION << 16, + BASE_ADDRESS_MODIFY)); + OUT_BATCH(kgem_add_reloc(&sna->kgem, /* surface */ + sna->kgem.nbatch, + NULL, + I915_GEM_DOMAIN_INSTRUCTION << 16, + BASE_ADDRESS_MODIFY)); + OUT_BATCH(0); /* media */ + OUT_BATCH(kgem_add_reloc(&sna->kgem, /* instruction */ + sna->kgem.nbatch, + sna->render_state.gen5.general_bo, + I915_GEM_DOMAIN_INSTRUCTION << 16, + BASE_ADDRESS_MODIFY)); + + /* upper bounds, all disabled */ + OUT_BATCH(BASE_ADDRESS_MODIFY); + OUT_BATCH(0); + OUT_BATCH(BASE_ADDRESS_MODIFY); +} + +static void +gen5_emit_invariant(struct sna *sna) +{ + /* Ironlake errata workaround: Before disabling the clipper, + * you have to MI_FLUSH to get the pipeline idle. + */ + OUT_BATCH(MI_FLUSH | MI_INHIBIT_RENDER_CACHE_FLUSH); + OUT_BATCH(GEN5_PIPELINE_SELECT | PIPELINE_SELECT_3D); + + gen5_emit_sip(sna); + gen5_emit_state_base_address(sna); + + sna->render_state.gen5.needs_invariant = FALSE; +} + +static void +gen5_get_batch(struct sna *sna) +{ + kgem_set_mode(&sna->kgem, KGEM_RENDER); + + if (!kgem_check_batch_with_surfaces(&sna->kgem, 150, 4)) { + DBG(("%s: flushing batch: %d < %d+%d\n", + __FUNCTION__, sna->kgem.surface - sna->kgem.nbatch, + 150, 4*8)); + kgem_submit(&sna->kgem); + } + + if (sna->render_state.gen5.needs_invariant) + gen5_emit_invariant(sna); +} + +static void +gen5_align_vertex(struct sna *sna, const struct sna_composite_op *op) +{ + if (op->floats_per_vertex != sna->render_state.gen5.floats_per_vertex) { + DBG(("aligning vertex: was %d, now %d floats per vertex, %d->%d\n", + sna->render_state.gen5.floats_per_vertex, + op->floats_per_vertex, + sna->render.vertex_index, + (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex)); + sna->render.vertex_index = (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex; + sna->render.vertex_used = sna->render.vertex_index * op->floats_per_vertex; + sna->render_state.gen5.floats_per_vertex = op->floats_per_vertex; + } +} + +static void +gen5_emit_binding_table(struct sna *sna, uint16_t offset) +{ + if (sna->render_state.gen5.surface_table == offset) + return; + + sna->render_state.gen5.surface_table = offset; + + /* Binding table pointers */ + OUT_BATCH(GEN5_3DSTATE_BINDING_TABLE_POINTERS | 4); + OUT_BATCH(0); /* vs */ + OUT_BATCH(0); /* gs */ + OUT_BATCH(0); /* clip */ + OUT_BATCH(0); /* sf */ + /* Only the PS uses the binding table */ + OUT_BATCH(offset*4); +} + +static bool +gen5_emit_pipelined_pointers(struct sna *sna, + const struct sna_composite_op *op, + int blend, int kernel) +{ + uint16_t offset = sna->kgem.nbatch, last; + + OUT_BATCH(GEN5_3DSTATE_PIPELINED_POINTERS | 5); + OUT_BATCH(sna->render_state.gen5.vs); + OUT_BATCH(GEN5_GS_DISABLE); /* passthrough */ + OUT_BATCH(GEN5_CLIP_DISABLE); /* passthrough */ + OUT_BATCH(sna->render_state.gen5.sf[op->mask.bo != NULL]); + OUT_BATCH(sna->render_state.gen5.wm + + SAMPLER_OFFSET(op->src.filter, op->src.repeat, + op->mask.filter, op->mask.repeat, + kernel)); + OUT_BATCH(sna->render_state.gen5.cc + + gen5_get_blend(blend, op->has_component_alpha, op->dst.format)); + + last = sna->render_state.gen5.last_pipelined_pointers; + if (last && + sna->kgem.batch[offset + 1] == sna->kgem.batch[last + 1] && + sna->kgem.batch[offset + 3] == sna->kgem.batch[last + 3] && + sna->kgem.batch[offset + 4] == sna->kgem.batch[last + 4] && + sna->kgem.batch[offset + 5] == sna->kgem.batch[last + 5] && + sna->kgem.batch[offset + 6] == sna->kgem.batch[last + 6]) { + sna->kgem.nbatch = offset; + return false; + } else { + sna->render_state.gen5.last_pipelined_pointers = offset; + return true; + } +} + +static void +gen5_emit_drawing_rectangle(struct sna *sna, const struct sna_composite_op *op) +{ + uint32_t limit = (op->dst.height - 1) << 16 | (op->dst.width - 1); + uint32_t offset = (uint16_t)op->dst.y << 16 | (uint16_t)op->dst.x; + + if (sna->render_state.gen5.drawrect_limit == limit && + sna->render_state.gen5.drawrect_offset == offset) + return; + sna->render_state.gen5.drawrect_offset = offset; + sna->render_state.gen5.drawrect_limit = limit; + + OUT_BATCH(GEN5_3DSTATE_DRAWING_RECTANGLE | (4 - 2)); + OUT_BATCH(0x00000000); + OUT_BATCH(limit); + OUT_BATCH(offset); +} + +static void +gen5_emit_vertex_elements(struct sna *sna, + const struct sna_composite_op *op) +{ + /* + * vertex data in vertex buffer + * position: (x, y) + * texture coordinate 0: (u0, v0) if (is_affine is TRUE) else (u0, v0, w0) + * texture coordinate 1 if (has_mask is TRUE): same as above + */ + struct gen5_render_state *render = &sna->render_state.gen5; + Bool has_mask = op->mask.bo != NULL; + Bool is_affine = op->is_affine; + int nelem = has_mask ? 2 : 1; + int selem = is_affine ? 2 : 3; + uint32_t w_component; + uint32_t src_format; + int id = op->u.gen5.ve_id;; + + if (render->ve_id == id) + return; + + render->ve_id = id; + + if (is_affine) { + src_format = GEN5_SURFACEFORMAT_R32G32_FLOAT; + w_component = GEN5_VFCOMPONENT_STORE_1_FLT; + } else { + src_format = GEN5_SURFACEFORMAT_R32G32B32_FLOAT; + w_component = GEN5_VFCOMPONENT_STORE_SRC; + } + + /* The VUE layout + * dword 0-3: pad (0.0, 0.0, 0.0. 0.0) + * dword 4-7: position (x, y, 1.0, 1.0), + * dword 8-11: texture coordinate 0 (u0, v0, w0, 1.0) + * dword 12-15: texture coordinate 1 (u1, v1, w1, 1.0) + * + * dword 4-15 are fetched from vertex buffer + */ + OUT_BATCH(GEN5_3DSTATE_VERTEX_ELEMENTS | + ((2 * (2 + nelem)) + 1 - 2)); + + OUT_BATCH((id << VE0_VERTEX_BUFFER_INDEX_SHIFT) | VE0_VALID | + (GEN5_SURFACEFORMAT_R32G32B32A32_FLOAT << VE0_FORMAT_SHIFT) | + (0 << VE0_OFFSET_SHIFT)); + OUT_BATCH((GEN5_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT) | + (GEN5_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT) | + (GEN5_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT) | + (GEN5_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_3_SHIFT)); + + /* x,y */ + OUT_BATCH((id << VE0_VERTEX_BUFFER_INDEX_SHIFT) | VE0_VALID | + (GEN5_SURFACEFORMAT_R16G16_SSCALED << VE0_FORMAT_SHIFT) | + (0 << VE0_OFFSET_SHIFT)); /* offsets vb in bytes */ + OUT_BATCH((GEN5_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | + (GEN5_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | + (GEN5_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT) | + (GEN5_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT)); + + /* u0, v0, w0 */ + OUT_BATCH((id << VE0_VERTEX_BUFFER_INDEX_SHIFT) | VE0_VALID | + (src_format << VE0_FORMAT_SHIFT) | + (4 << VE0_OFFSET_SHIFT)); /* offset vb in bytes */ + OUT_BATCH((GEN5_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | + (GEN5_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | + (w_component << VE1_VFCOMPONENT_2_SHIFT) | + (GEN5_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT)); + + /* u1, v1, w1 */ + if (has_mask) { + OUT_BATCH((id << VE0_VERTEX_BUFFER_INDEX_SHIFT) | VE0_VALID | + (src_format << VE0_FORMAT_SHIFT) | + (((1 + selem) * 4) << VE0_OFFSET_SHIFT)); /* vb offset in bytes */ + OUT_BATCH((GEN5_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT) | + (GEN5_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT) | + (w_component << VE1_VFCOMPONENT_2_SHIFT) | + (GEN5_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT)); + } +} + +static void +gen5_emit_state(struct sna *sna, + const struct sna_composite_op *op, + uint16_t offset) +{ + gen5_emit_binding_table(sna, offset); + if (gen5_emit_pipelined_pointers(sna, op, op->op, op->u.gen5.wm_kernel)) + gen5_emit_urb(sna); + gen5_emit_vertex_elements(sna, op); + gen5_emit_drawing_rectangle(sna, op); +} + +static void gen5_bind_surfaces(struct sna *sna, + const struct sna_composite_op *op) +{ + uint32_t *binding_table; + uint16_t offset; + + gen5_get_batch(sna); + + binding_table = gen5_composite_get_binding_table(sna, op, &offset); + + binding_table[0] = + gen5_bind_bo(sna, + op->dst.bo, op->dst.width, op->dst.height, + gen5_get_dest_format(op->dst.format), + TRUE); + binding_table[1] = + gen5_bind_bo(sna, + op->src.bo, op->src.width, op->src.height, + op->src.card_format, + FALSE); + if (op->mask.bo) + binding_table[2] = + gen5_bind_bo(sna, + op->mask.bo, + op->mask.width, + op->mask.height, + op->mask.card_format, + FALSE); + + if (sna->kgem.surface == offset && + *(uint64_t *)(sna->kgem.batch + sna->render_state.gen5.surface_table) == *(uint64_t*)binding_table && + (op->mask.bo == NULL || + sna->kgem.batch[sna->render_state.gen5.surface_table+2] == binding_table[2])) { + sna->kgem.surface += sizeof(struct gen5_surface_state_padded) / sizeof(uint32_t); + offset = sna->render_state.gen5.surface_table; + } + + gen5_emit_state(sna, op, offset); +} + + +fastcall static void +gen5_render_composite_blt(struct sna *sna, + const struct sna_composite_op *op, + const struct sna_composite_rectangles *r) +{ + DBG(("%s: src=(%d, %d)+(%d, %d), mask=(%d, %d)+(%d, %d), dst=(%d, %d)+(%d, %d), size=(%d, %d)\n", + __FUNCTION__, + r->src.x, r->src.y, op->src.offset[0], op->src.offset[1], + r->mask.x, r->mask.y, op->mask.offset[0], op->mask.offset[1], + r->dst.x, r->dst.y, op->dst.x, op->dst.y, + r->width, r->height)); + + if (!gen5_get_rectangles(sna, op, 1)) { + gen5_bind_surfaces(sna, op); + gen5_get_rectangles(sna, op, 1); + } + + op->prim_emit(sna, op, r); +} + +static void +gen5_render_composite_boxes(struct sna *sna, + const struct sna_composite_op *op, + const BoxRec *box, int nbox) +{ + DBG(("%s(%d) delta=(%d, %d), src=(%d, %d)/(%d, %d), mask=(%d, %d)/(%d, %d)\n", + __FUNCTION__, nbox, op->dst.x, op->dst.y, + op->src.offset[0], op->src.offset[1], + op->src.width, op->src.height, + op->mask.offset[0], op->mask.offset[1], + op->mask.width, op->mask.height)); + + do { + int nbox_this_time = gen5_get_rectangles(sna, op, nbox); + if (nbox_this_time == 0) { + gen5_bind_surfaces(sna, op); + nbox_this_time = gen5_get_rectangles(sna, op, nbox); + } + nbox -= nbox_this_time; + + do { + struct sna_composite_rectangles r; + + DBG((" %s: (%d, %d), (%d, %d)\n", + __FUNCTION__, + box->x1, box->y1, box->x2, box->y2)); + + r.dst.x = box->x1; + r.dst.y = box->y1; + r.width = box->x2 - box->x1; + r.height = box->y2 - box->y1; + r.mask = r.src = r.dst; + op->prim_emit(sna, op, &r); + box++; + } while (--nbox_this_time); + } while (nbox); +} + +#ifndef MAX +#define MAX(a,b) ((a) > (b) ? (a) : (b)) +#endif + +static uint32_t gen5_bind_video_source(struct sna *sna, + struct kgem_bo *src_bo, + uint32_t src_offset, + int src_width, + int src_height, + int src_pitch, + uint32_t src_surf_format) +{ + struct gen5_surface_state *ss; + + sna->kgem.surface -= sizeof(struct gen5_surface_state_padded) / sizeof(uint32_t); + + ss = memset(sna->kgem.batch + sna->kgem.surface, 0, sizeof(*ss)); + ss->ss0.surface_type = GEN5_SURFACE_2D; + ss->ss0.surface_format = src_surf_format; + ss->ss0.color_blend = 1; + + ss->ss1.base_addr = + kgem_add_reloc(&sna->kgem, + sna->kgem.surface + 1, + src_bo, + I915_GEM_DOMAIN_SAMPLER << 16, + src_offset); + + ss->ss2.width = src_width - 1; + ss->ss2.height = src_height - 1; + ss->ss3.pitch = src_pitch - 1; + + return sna->kgem.surface * sizeof(uint32_t); +} + +static void gen5_video_bind_surfaces(struct sna *sna, + struct sna_composite_op *op, + struct sna_video_frame *frame) +{ + uint32_t src_surf_format; + uint32_t src_surf_base[6]; + int src_width[6]; + int src_height[6]; + int src_pitch[6]; + uint32_t *binding_table; + int n_src, n; + uint16_t offset; + + + src_surf_base[0] = frame->YBufOffset; + src_surf_base[1] = frame->YBufOffset; + src_surf_base[2] = frame->VBufOffset; + src_surf_base[3] = frame->VBufOffset; + src_surf_base[4] = frame->UBufOffset; + src_surf_base[5] = frame->UBufOffset; + + if (is_planar_fourcc(frame->id)) { + src_surf_format = GEN5_SURFACEFORMAT_R8_UNORM; + src_width[1] = src_width[0] = frame->width; + src_height[1] = src_height[0] = frame->height; + src_pitch[1] = src_pitch[0] = frame->pitch[1]; + src_width[4] = src_width[5] = src_width[2] = src_width[3] = + frame->width / 2; + src_height[4] = src_height[5] = src_height[2] = src_height[3] = + frame->height / 2; + src_pitch[4] = src_pitch[5] = src_pitch[2] = src_pitch[3] = + frame->pitch[0]; + n_src = 6; + } else { + if (frame->id == FOURCC_UYVY) + src_surf_format = GEN5_SURFACEFORMAT_YCRCB_SWAPY; + else + src_surf_format = GEN5_SURFACEFORMAT_YCRCB_NORMAL; + + src_width[0] = frame->width; + src_height[0] = frame->height; + src_pitch[0] = frame->pitch[0]; + n_src = 1; + } + + gen5_get_batch(sna); + binding_table = gen5_composite_get_binding_table(sna, op, &offset); + + binding_table[0] = + gen5_bind_bo(sna, + op->dst.bo, op->dst.width, op->dst.height, + gen5_get_dest_format_for_depth(op->dst.format), + TRUE); + for (n = 0; n < n_src; n++) { + binding_table[1+n] = + gen5_bind_video_source(sna, + frame->bo, + src_surf_base[n], + src_width[n], + src_height[n], + src_pitch[n], + src_surf_format); + } + + gen5_emit_state(sna, op, offset); +} + +static Bool +gen5_render_video(struct sna *sna, + struct sna_video *video, + struct sna_video_frame *frame, + RegionPtr dstRegion, + short src_w, short src_h, + short drw_w, short drw_h, + PixmapPtr pixmap) +{ + struct sna_composite_op tmp; + int nbox, dxo, dyo, pix_xoff, pix_yoff; + float src_scale_x, src_scale_y; + struct sna_pixmap *priv; + BoxPtr box; + + DBG(("%s: %dx%d -> %dx%d\n", __FUNCTION__, src_w, src_h, drw_w, drw_h)); + + priv = sna_pixmap_force_to_gpu(pixmap); + if (priv == NULL) + return FALSE; + + memset(&tmp, 0, sizeof(tmp)); + + tmp.op = PictOpSrc; + tmp.dst.pixmap = pixmap; + tmp.dst.width = pixmap->drawable.width; + tmp.dst.height = pixmap->drawable.height; + tmp.dst.format = sna_format_for_depth(pixmap->drawable.depth); + tmp.dst.bo = priv->gpu_bo; + + tmp.src.filter = SAMPLER_FILTER_BILINEAR; + tmp.src.repeat = SAMPLER_EXTEND_NONE; + tmp.u.gen5.wm_kernel = + is_planar_fourcc(frame->id) ? WM_KERNEL_VIDEO_PLANAR : WM_KERNEL_VIDEO_PACKED; + tmp.u.gen5.ve_id = 1; + tmp.is_affine = TRUE; + tmp.floats_per_vertex = 3; + + if (!kgem_check_bo(&sna->kgem, tmp.dst.bo)) + kgem_submit(&sna->kgem); + if (!kgem_check_bo(&sna->kgem, frame->bo)) + kgem_submit(&sna->kgem); + + if (!kgem_bo_is_dirty(frame->bo)) + kgem_emit_flush(&sna->kgem); + + gen5_video_bind_surfaces(sna, &tmp, frame); + gen5_align_vertex(sna, &tmp); + + /* Set up the offset for translating from the given region (in screen + * coordinates) to the backing pixmap. + */ +#ifdef COMPOSITE + pix_xoff = -pixmap->screen_x + pixmap->drawable.x; + pix_yoff = -pixmap->screen_y + pixmap->drawable.y; +#else + pix_xoff = 0; + pix_yoff = 0; +#endif + + dxo = dstRegion->extents.x1; + dyo = dstRegion->extents.y1; + + /* Use normalized texture coordinates */ + src_scale_x = ((float)src_w / frame->width) / (float)drw_w; + src_scale_y = ((float)src_h / frame->height) / (float)drw_h; + + box = REGION_RECTS(dstRegion); + nbox = REGION_NUM_RECTS(dstRegion); + while (nbox--) { + BoxRec r; + + r.x1 = box->x1 + pix_xoff; + r.x2 = box->x2 + pix_xoff; + r.y1 = box->y1 + pix_yoff; + r.y2 = box->y2 + pix_yoff; + + if (!gen5_get_rectangles(sna, &tmp, 1)) { + gen5_video_bind_surfaces(sna, &tmp, frame); + gen5_get_rectangles(sna, &tmp, 1); + } + + OUT_VERTEX(r.x2, r.y2); + OUT_VERTEX_F((box->x2 - dxo) * src_scale_x); + OUT_VERTEX_F((box->y2 - dyo) * src_scale_y); + + OUT_VERTEX(r.x1, r.y2); + OUT_VERTEX_F((box->x1 - dxo) * src_scale_x); + OUT_VERTEX_F((box->y2 - dyo) * src_scale_y); + + OUT_VERTEX(r.x1, r.y1); + OUT_VERTEX_F((box->x1 - dxo) * src_scale_x); + OUT_VERTEX_F((box->y1 - dyo) * src_scale_y); + + sna_damage_add_box(&priv->gpu_damage, &r); + sna_damage_subtract_box(&priv->cpu_damage, &r); + box++; + } + + return TRUE; +} + +static int +gen5_composite_solid_init(struct sna *sna, + struct sna_composite_channel *channel, + uint32_t color) +{ + channel->filter = PictFilterNearest; + channel->repeat = RepeatNormal; + channel->is_affine = TRUE; + channel->is_solid = TRUE; + channel->transform = NULL; + channel->width = 1; + channel->height = 1; + channel->card_format = GEN5_SURFACEFORMAT_B8G8R8A8_UNORM; + + channel->bo = sna_render_get_solid(sna, color); + + channel->scale[0] = channel->scale[1] = 1; + channel->offset[0] = channel->offset[1] = 0; + return channel->bo != NULL; +} + +static int +gen5_composite_picture(struct sna *sna, + PicturePtr picture, + struct sna_composite_channel *channel, + int x, int y, + int w, int h, + int dst_x, int dst_y) +{ + PixmapPtr pixmap; + uint32_t color; + int16_t dx, dy; + + DBG(("%s: (%d, %d)x(%d, %d), dst=(%d, %d)\n", + __FUNCTION__, x, y, w, h, dst_x, dst_y)); + + channel->is_solid = FALSE; + channel->card_format = -1; + + if (sna_picture_is_solid(picture, &color)) + return gen5_composite_solid_init(sna, channel, color); + + if (picture->pDrawable == NULL) + return sna_render_picture_fixup(sna, picture, channel, + x, y, w, h, dst_x, dst_y); + + if (!gen5_check_repeat(picture)) + return sna_render_picture_fixup(sna, picture, channel, + x, y, w, h, dst_x, dst_y); + + if (!gen5_check_filter(picture)) + return sna_render_picture_fixup(sna, picture, channel, + x, y, w, h, dst_x, dst_y); + + channel->repeat = picture->repeat ? picture->repeatType : RepeatNone; + channel->filter = picture->filter; + + pixmap = get_drawable_pixmap(picture->pDrawable); + get_drawable_deltas(picture->pDrawable, pixmap, &dx, &dy); + + x += dx + picture->pDrawable->x; + y += dy + picture->pDrawable->y; + + channel->is_affine = sna_transform_is_affine(picture->transform); + if (sna_transform_is_integer_translation(picture->transform, &dx, &dy)) { + DBG(("%s: integer translation (%d, %d), removing\n", + __FUNCTION__, dx, dy)); + x += dx; + y += dy; + channel->transform = NULL; + channel->filter = PictFilterNearest; + } else + channel->transform = picture->transform; + + channel->card_format = gen5_get_card_format(picture->format); + if (channel->card_format == -1) + return sna_render_picture_convert(sna, picture, channel, pixmap, + x, y, w, h, dst_x, dst_y); + + if (pixmap->drawable.width > 8192 || pixmap->drawable.height > 8192) + return sna_render_picture_extract(sna, picture, channel, + x, y, w, h, dst_x, dst_y); + + return sna_render_pixmap_bo(sna, channel, pixmap, + x, y, w, h, dst_x, dst_y); +} + +static void gen5_composite_channel_convert(struct sna_composite_channel *channel) +{ + channel->repeat = gen5_repeat(channel->repeat); + channel->filter = gen5_filter(channel->filter); + if (channel->card_format == -1) + channel->card_format = gen5_get_card_format(channel->pict_format); +} + +static void +gen5_render_composite_done(struct sna *sna, + const struct sna_composite_op *op) +{ + gen5_vertex_flush(sna); + _kgem_set_mode(&sna->kgem, KGEM_RENDER); + sna->render.op = NULL; + + DBG(("%s()\n", __FUNCTION__)); + + sna_render_composite_redirect_done(sna, op); + + if (op->src.bo) + kgem_bo_destroy(&sna->kgem, op->src.bo); + if (op->mask.bo) + kgem_bo_destroy(&sna->kgem, op->mask.bo); +} + +static Bool +gen5_composite_set_target(struct sna *sna, + PicturePtr dst, + struct sna_composite_op *op) +{ + struct sna_pixmap *priv; + + DBG(("%s: dst=%p\n", __FUNCTION__, dst)); + + if (!gen5_check_dst_format(dst->format)) { + DBG(("%s: incompatible dst format %08x\n", + __FUNCTION__, dst->format)); + return FALSE; + } + + op->dst.pixmap = get_drawable_pixmap(dst->pDrawable); + op->dst.width = op->dst.pixmap->drawable.width; + op->dst.height = op->dst.pixmap->drawable.height; + op->dst.format = dst->format; + priv = sna_pixmap_force_to_gpu(op->dst.pixmap); + if (priv == NULL) + return FALSE; + + DBG(("%s: pixmap=%p, format=%08x\n", __FUNCTION__, + op->dst.pixmap, (unsigned int)op->dst.format)); + + + op->dst.bo = priv->gpu_bo; + if (!priv->gpu_only) + op->damage = &priv->gpu_damage; + + DBG(("%s: bo=%p, damage=%p\n", __FUNCTION__, op->dst.bo, op->damage)); + + get_drawable_deltas(dst->pDrawable, op->dst.pixmap, + &op->dst.x, &op->dst.y); + return TRUE; +} + +static inline Bool +picture_is_cpu(PicturePtr picture) +{ + if (!picture->pDrawable) + return FALSE; + + /* If it is a solid, try to use the render paths */ + if (picture->pDrawable->width == 1 && + picture->pDrawable->height == 1 && + picture->repeat) + return FALSE; + + return is_cpu(picture->pDrawable); +} + +static Bool +try_blt(struct sna *sna, + PicturePtr dst, + PicturePtr source, + int width, int height) +{ + if (sna->kgem.mode == KGEM_BLT) { + DBG(("%s: already performing BLT\n", __FUNCTION__)); + return TRUE; + } + + if (width > 8192 || height > 8192) { + DBG(("%s: operation too large for 3D pipe (%d, %d)\n", + __FUNCTION__, width, height)); + return TRUE; + } + + /* is the source picture only in cpu memory e.g. a shm pixmap? */ + return picture_is_cpu(source); +} + +static Bool +gen5_render_composite(struct sna *sna, + uint8_t op, + PicturePtr src, + PicturePtr mask, + PicturePtr dst, + int16_t src_x, int16_t src_y, + int16_t msk_x, int16_t msk_y, + int16_t dst_x, int16_t dst_y, + int16_t width, int16_t height, + struct sna_composite_op *tmp) +{ + DBG(("%s: %dx%d, current mode=%d\n", __FUNCTION__, + width, height, sna->kgem.mode)); + + if (mask == NULL && + try_blt(sna, dst, src, width, height) && + sna_blt_composite(sna, op, + src, dst, + src_x, src_y, + dst_x, dst_y, + width, height, tmp)) + return TRUE; + + if (op >= ARRAY_SIZE(gen5_blend_op)) { + DBG(("%s: unhandled blend op %d\n", __FUNCTION__, op)); + return FALSE; + } + + if (need_tiling(sna, width, height)) + return sna_tiling_composite(sna, + op, src, mask, dst, + src_x, src_y, + msk_x, msk_y, + dst_x, dst_y, + width, height, + tmp); + + if (!gen5_composite_set_target(sna, dst, tmp)) { + DBG(("%s: failed to set composite target\n", __FUNCTION__)); + return FALSE; + } + + if (tmp->dst.width > 8192 || tmp->dst.height > 8192) { + if (!sna_render_composite_redirect(sna, tmp, + dst_x, dst_y, width, height)) + return FALSE; + } + + DBG(("%s: preparing source\n", __FUNCTION__)); + switch (gen5_composite_picture(sna, src, &tmp->src, + src_x, src_y, + width, height, + dst_x, dst_y)) { + case -1: + DBG(("%s: failed to prepare source picture\n", __FUNCTION__)); + goto cleanup_dst; + case 0: + gen5_composite_solid_init(sna, &tmp->src, 0); + case 1: + gen5_composite_channel_convert(&tmp->src); + break; + } + + tmp->op = op; + tmp->is_affine = tmp->src.is_affine; + tmp->has_component_alpha = FALSE; + tmp->need_magic_ca_pass = FALSE; + + tmp->prim_emit = gen5_emit_composite_primitive; + if (mask) { + if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format)) { + tmp->has_component_alpha = TRUE; + + /* Check if it's component alpha that relies on a source alpha and on + * the source value. We can only get one of those into the single + * source value that we get to blend with. + */ + if (gen5_blend_op[op].src_alpha && + (gen5_blend_op[op].src_blend != GEN5_BLENDFACTOR_ZERO)) { + if (op != PictOpOver) { + DBG(("%s: unhandled CA blend op %d\n", __FUNCTION__, op)); + goto cleanup_src; + } + + tmp->need_magic_ca_pass = TRUE; + tmp->op = PictOpOutReverse; + } + } + + DBG(("%s: preparing mask\n", __FUNCTION__)); + switch (gen5_composite_picture(sna, mask, &tmp->mask, + msk_x, msk_y, + width, height, + dst_x, dst_y)) { + case -1: + DBG(("%s: failed to prepare mask picture\n", __FUNCTION__)); + goto cleanup_src; + case 0: + gen5_composite_solid_init(sna, &tmp->mask, 0); + case 1: + gen5_composite_channel_convert(&tmp->mask); + break; + } + + tmp->is_affine &= tmp->mask.is_affine; + + if (tmp->src.transform == NULL && tmp->mask.transform == NULL) + tmp->prim_emit = gen5_emit_composite_primitive_identity_source_mask; + + tmp->floats_per_vertex = 5 + 2 * !tmp->is_affine; + } else { + if (tmp->src.is_solid) + tmp->prim_emit = gen5_emit_composite_primitive_solid; + else if (tmp->src.transform == NULL) + tmp->prim_emit = gen5_emit_composite_primitive_identity_source; + else if (tmp->src.is_affine) + tmp->prim_emit = gen5_emit_composite_primitive_affine_source; + + tmp->floats_per_vertex = 3 + !tmp->is_affine; + } + + tmp->u.gen5.wm_kernel = + gen5_choose_composite_kernel(tmp->op, + tmp->mask.bo != NULL, + tmp->has_component_alpha, + tmp->is_affine); + tmp->u.gen5.ve_id = (tmp->mask.bo != NULL) << 1 | tmp->is_affine; + + tmp->blt = gen5_render_composite_blt; + tmp->boxes = gen5_render_composite_boxes; + tmp->done = gen5_render_composite_done; + + if (!kgem_check_bo(&sna->kgem, tmp->dst.bo)) + kgem_submit(&sna->kgem); + if (!kgem_check_bo(&sna->kgem, tmp->src.bo)) + kgem_submit(&sna->kgem); + if (!kgem_check_bo(&sna->kgem, tmp->mask.bo)) + kgem_submit(&sna->kgem); + + if (kgem_bo_is_dirty(tmp->src.bo) || kgem_bo_is_dirty(tmp->mask.bo)) + kgem_emit_flush(&sna->kgem); + + gen5_bind_surfaces(sna, tmp); + gen5_align_vertex(sna, tmp); + + sna->render.op = tmp; + return TRUE; + +cleanup_src: + if (tmp->src.bo) + kgem_bo_destroy(&sna->kgem, tmp->src.bo); +cleanup_dst: + if (tmp->redirect.real_bo) + kgem_bo_destroy(&sna->kgem, tmp->dst.bo); + return FALSE; +} + +static void +gen5_copy_bind_surfaces(struct sna *sna, + const struct sna_composite_op *op) +{ + uint32_t *binding_table; + uint16_t offset; + + gen5_get_batch(sna); + + binding_table = gen5_composite_get_binding_table(sna, op, &offset); + + binding_table[0] = + gen5_bind_bo(sna, + op->dst.bo, op->dst.width, op->dst.height, + gen5_get_dest_format_for_depth(op->dst.pixmap->drawable.depth), + TRUE); + binding_table[1] = + gen5_bind_bo(sna, + op->src.bo, op->src.width, op->src.height, + op->src.card_format, + FALSE); + + if (sna->kgem.surface == offset && + *(uint64_t *)(sna->kgem.batch + sna->render_state.gen5.surface_table) == *(uint64_t*)binding_table) { + sna->kgem.surface += sizeof(struct gen5_surface_state_padded) / sizeof(uint32_t); + offset = sna->render_state.gen5.surface_table; + } + + gen5_emit_state(sna, op,offset); +} + +static Bool +gen5_render_copy_boxes(struct sna *sna, uint8_t alu, + PixmapPtr src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy, + PixmapPtr dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, + const BoxRec *box, int n) +{ + struct sna_composite_op tmp; + + if (sna->kgem.mode == KGEM_BLT && + sna_blt_copy_boxes(sna, alu, + src_bo, src_dx, src_dy, + dst_bo, dst_dx, dst_dy, + dst->drawable.bitsPerPixel, + box, n)) + return TRUE; + + if (!(alu == GXcopy || alu == GXclear) || src_bo == dst_bo || + src->drawable.width > 8192 || src->drawable.height > 8192 || + dst->drawable.width > 8192 || dst->drawable.height > 8192) + return sna_blt_copy_boxes(sna, alu, + src_bo, src_dx, src_dy, + dst_bo, dst_dx, dst_dy, + dst->drawable.bitsPerPixel, + box, n); + + DBG(("%s (%d, %d)->(%d, %d) x %d\n", + __FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, n)); + + memset(&tmp, 0, sizeof(tmp)); + + tmp.op = alu == GXcopy ? PictOpSrc : PictOpClear; + + tmp.dst.pixmap = dst; + tmp.dst.width = dst->drawable.width; + tmp.dst.height = dst->drawable.height; + tmp.dst.format = sna_format_for_depth(dst->drawable.depth); + tmp.dst.bo = dst_bo; + + tmp.src.bo = src_bo; + tmp.src.filter = SAMPLER_FILTER_NEAREST; + tmp.src.repeat = SAMPLER_EXTEND_NONE; + tmp.src.card_format = + gen5_get_card_format_for_depth(src->drawable.depth), + tmp.src.width = src->drawable.width; + tmp.src.height = src->drawable.height; + + tmp.is_affine = TRUE; + tmp.floats_per_vertex = 3; + tmp.u.gen5.wm_kernel = WM_KERNEL; + tmp.u.gen5.ve_id = 1; + + if (!kgem_check_bo(&sna->kgem, dst_bo)) + kgem_submit(&sna->kgem); + if (!kgem_check_bo(&sna->kgem, src_bo)) + kgem_submit(&sna->kgem); + + if (kgem_bo_is_dirty(src_bo)) + kgem_emit_flush(&sna->kgem); + + gen5_get_batch(sna); + gen5_copy_bind_surfaces(sna, &tmp); + gen5_align_vertex(sna, &tmp); + + tmp.src.scale[0] = 1. / src->drawable.width; + tmp.src.scale[1] = 1. / src->drawable.height; + do { + int n_this_time = gen5_get_rectangles(sna, &tmp, n); + if (n_this_time == 0) { + gen5_copy_bind_surfaces(sna, &tmp); + n_this_time = gen5_get_rectangles(sna, &tmp, n); + } + n -= n_this_time; + + do { + DBG((" (%d, %d) -> (%d, %d) + (%d, %d)\n", + box->x1 + src_dx, box->y1 + src_dy, + box->x1 + dst_dx, box->y1 + dst_dy, + box->x2 - box->x1, box->y2 - box->y1)); + OUT_VERTEX(box->x2 + dst_dx, box->y2 + dst_dy); + OUT_VERTEX_F((box->x2 + src_dx) * tmp.src.scale[0]); + OUT_VERTEX_F((box->y2 + src_dy) * tmp.src.scale[1]); + + OUT_VERTEX(box->x1 + dst_dx, box->y2 + dst_dy); + OUT_VERTEX_F((box->x1 + src_dx) * tmp.src.scale[0]); + OUT_VERTEX_F((box->y2 + src_dy) * tmp.src.scale[1]); + + OUT_VERTEX(box->x1 + dst_dx, box->y1 + dst_dy); + OUT_VERTEX_F((box->x1 + src_dx) * tmp.src.scale[0]); + OUT_VERTEX_F((box->y1 + src_dy) * tmp.src.scale[1]); + + box++; + } while (--n_this_time); + } while (n); + + gen5_vertex_flush(sna); + _kgem_set_mode(&sna->kgem, KGEM_RENDER); + return TRUE; +} + +static void +gen5_render_copy_blt(struct sna *sna, + const struct sna_copy_op *op, + int16_t sx, int16_t sy, + int16_t w, int16_t h, + int16_t dx, int16_t dy) +{ + DBG(("%s: src=(%d, %d), dst=(%d, %d), size=(%d, %d)\n", __FUNCTION__, + sx, sy, dx, dy, w, h)); + + if (!gen5_get_rectangles(sna, &op->base, 1)) { + gen5_copy_bind_surfaces(sna, &op->base); + gen5_get_rectangles(sna, &op->base, 1); + } + + OUT_VERTEX(dx+w, dy+h); + OUT_VERTEX_F((sx+w)*op->base.src.scale[0]); + OUT_VERTEX_F((sy+h)*op->base.src.scale[1]); + + OUT_VERTEX(dx, dy+h); + OUT_VERTEX_F(sx*op->base.src.scale[0]); + OUT_VERTEX_F((sy+h)*op->base.src.scale[1]); + + OUT_VERTEX(dx, dy); + OUT_VERTEX_F(sx*op->base.src.scale[0]); + OUT_VERTEX_F(sy*op->base.src.scale[1]); +} + +static void +gen5_render_copy_done(struct sna *sna, + const struct sna_copy_op *op) +{ + gen5_vertex_flush(sna); + _kgem_set_mode(&sna->kgem, KGEM_RENDER); + + DBG(("%s()\n", __FUNCTION__)); +} + +static Bool +gen5_render_copy(struct sna *sna, uint8_t alu, + PixmapPtr src, struct kgem_bo *src_bo, + PixmapPtr dst, struct kgem_bo *dst_bo, + struct sna_copy_op *op) +{ + DBG(("%s (alu=%d)\n", __FUNCTION__, alu)); + + if (sna->kgem.mode == KGEM_BLT && + sna_blt_copy(sna, alu, + src_bo, dst_bo, + dst->drawable.bitsPerPixel, + op)) + return TRUE; + + if (!(alu == GXcopy || alu == GXclear) || src_bo == dst_bo || + src->drawable.width > 8192 || src->drawable.height > 8192 || + dst->drawable.width > 8192 || dst->drawable.height > 8192) + return sna_blt_copy(sna, alu, src_bo, dst_bo, + dst->drawable.bitsPerPixel, + op); + + op->base.op = alu == GXcopy ? PictOpSrc : PictOpClear; + + op->base.dst.pixmap = dst; + op->base.dst.width = dst->drawable.width; + op->base.dst.height = dst->drawable.height; + op->base.dst.format = sna_format_for_depth(dst->drawable.depth); + op->base.dst.bo = dst_bo; + + op->base.src.bo = src_bo; + op->base.src.card_format = + gen5_get_card_format_for_depth(src->drawable.depth), + op->base.src.width = src->drawable.width; + op->base.src.height = src->drawable.height; + op->base.src.scale[0] = 1./src->drawable.width; + op->base.src.scale[1] = 1./src->drawable.height; + op->base.src.filter = SAMPLER_FILTER_NEAREST; + op->base.src.repeat = SAMPLER_EXTEND_NONE; + + op->base.is_affine = true; + op->base.floats_per_vertex = 3; + op->base.u.gen5.wm_kernel = WM_KERNEL; + op->base.u.gen5.ve_id = 1; + + if (!kgem_check_bo(&sna->kgem, dst_bo)) + kgem_submit(&sna->kgem); + if (!kgem_check_bo(&sna->kgem, src_bo)) + kgem_submit(&sna->kgem); + + if (kgem_bo_is_dirty(src_bo)) + kgem_emit_flush(&sna->kgem); + + gen5_copy_bind_surfaces(sna, &op->base); + gen5_align_vertex(sna, &op->base); + + op->blt = gen5_render_copy_blt; + op->done = gen5_render_copy_done; + return TRUE; +} + +static void +gen5_fill_bind_surfaces(struct sna *sna, + const struct sna_composite_op *op) +{ + uint32_t *binding_table; + uint16_t offset; + + gen5_get_batch(sna); + + binding_table = gen5_composite_get_binding_table(sna, op, &offset); + + binding_table[0] = + gen5_bind_bo(sna, + op->dst.bo, op->dst.width, op->dst.height, + gen5_get_dest_format(op->dst.format), + TRUE); + binding_table[1] = + gen5_bind_bo(sna, + op->src.bo, 1, 1, + GEN5_SURFACEFORMAT_B8G8R8A8_UNORM, + FALSE); + + if (sna->kgem.surface == offset && + *(uint64_t *)(sna->kgem.batch + sna->render_state.gen5.surface_table) == *(uint64_t*)binding_table) { + sna->kgem.surface += + sizeof(struct gen5_surface_state_padded)/sizeof(uint32_t); + offset = sna->render_state.gen5.surface_table; + } + + gen5_emit_state(sna, op, offset); +} + +static Bool +gen5_render_fill_boxes(struct sna *sna, + CARD8 op, + PictFormat format, + const xRenderColor *color, + PixmapPtr dst, struct kgem_bo *dst_bo, + const BoxRec *box, int n) +{ + struct sna_composite_op tmp; + uint32_t pixel; + + DBG(("%s op=%x, color=%08x, boxes=%d x [((%d, %d), (%d, %d))...]\n", + __FUNCTION__, op, pixel, n, box->x1, box->y1, box->x2, box->y2)); + + if (op >= ARRAY_SIZE(gen5_blend_op)) { + DBG(("%s: fallback due to unhandled blend op: %d\n", + __FUNCTION__, op)); + return FALSE; + } + + if (sna->kgem.mode == KGEM_BLT || + dst->drawable.width > 8192 || + dst->drawable.height > 8192 || + !gen5_check_dst_format(format)) { + uint8_t alu = GXcopy; + + if (op == PictOpClear) { + alu = GXclear; + pixel = 0; + op = PictOpSrc; + } + + if (op == PictOpOver && color->alpha >= 0xff00) + op = PictOpSrc; + + if (op == PictOpSrc && + sna_get_pixel_from_rgba(&pixel, + color->red, + color->green, + color->blue, + color->alpha, + format) && + sna_blt_fill_boxes(sna, alu, + dst_bo, dst->drawable.bitsPerPixel, + pixel, box, n)) + return TRUE; + + if (dst->drawable.width > 8192 || + dst->drawable.height > 8192 || + !gen5_check_dst_format(format)) + return FALSE; + } + + if (!sna_get_pixel_from_rgba(&pixel, + color->red, + color->green, + color->blue, + color->alpha, + PICT_a8r8g8b8)) + return FALSE; + + memset(&tmp, 0, sizeof(tmp)); + + tmp.op = op; + + tmp.dst.pixmap = dst; + tmp.dst.width = dst->drawable.width; + tmp.dst.height = dst->drawable.height; + tmp.dst.format = format; + tmp.dst.bo = dst_bo; + + tmp.src.bo = sna_render_get_solid(sna, pixel); + tmp.src.filter = SAMPLER_FILTER_NEAREST; + tmp.src.repeat = SAMPLER_EXTEND_REPEAT; + + tmp.is_affine = TRUE; + tmp.floats_per_vertex = 3; + tmp.u.gen5.wm_kernel = WM_KERNEL; + tmp.u.gen5.ve_id = 1; + + if (!kgem_check_bo(&sna->kgem, dst_bo)) + kgem_submit(&sna->kgem); + + gen5_fill_bind_surfaces(sna, &tmp); + gen5_align_vertex(sna, &tmp); + + do { + int n_this_time = gen5_get_rectangles(sna, &tmp, n); + if (n_this_time == 0) { + gen5_fill_bind_surfaces(sna, &tmp); + n_this_time = gen5_get_rectangles(sna, &tmp, n); + } + n -= n_this_time; + do { + DBG((" (%d, %d), (%d, %d)\n", + box->x1, box->y1, box->x2, box->y2)); + OUT_VERTEX(box->x2, box->y2); + OUT_VERTEX_F(1); + OUT_VERTEX_F(1); + + OUT_VERTEX(box->x1, box->y2); + OUT_VERTEX_F(0); + OUT_VERTEX_F(1); + + OUT_VERTEX(box->x1, box->y1); + OUT_VERTEX_F(0); + OUT_VERTEX_F(0); + + box++; + } while (--n_this_time); + } while (n); + + gen5_vertex_flush(sna); + kgem_bo_destroy(&sna->kgem, tmp.src.bo); + _kgem_set_mode(&sna->kgem, KGEM_RENDER); + return TRUE; +} + +static void +gen5_render_fill_blt(struct sna *sna, + const struct sna_fill_op *op, + int16_t x, int16_t y, int16_t w, int16_t h) +{ + DBG(("%s (%d, %d)x(%d, %d)\n", __FUNCTION__, x,y,w,h)); + + if (!gen5_get_rectangles(sna, &op->base, 1)) { + gen5_fill_bind_surfaces(sna, &op->base); + gen5_get_rectangles(sna, &op->base, 1); + } + + OUT_VERTEX(x+w, y+h); + OUT_VERTEX_F(1); + OUT_VERTEX_F(1); + + OUT_VERTEX(x, y+h); + OUT_VERTEX_F(0); + OUT_VERTEX_F(1); + + OUT_VERTEX(x, y); + OUT_VERTEX_F(0); + OUT_VERTEX_F(0); +} + +static void +gen5_render_fill_done(struct sna *sna, + const struct sna_fill_op *op) +{ + gen5_vertex_flush(sna); + kgem_bo_destroy(&sna->kgem, op->base.src.bo); + _kgem_set_mode(&sna->kgem, KGEM_RENDER); + + DBG(("%s()\n", __FUNCTION__)); +} + +static Bool +gen5_render_fill(struct sna *sna, uint8_t alu, + PixmapPtr dst, struct kgem_bo *dst_bo, + uint32_t color, + struct sna_fill_op *op) +{ + DBG(("%s(alu=%d, color=%08x)\n", __FUNCTION__, alu, color)); + + if (sna->kgem.mode == KGEM_BLT && + sna_blt_fill(sna, alu, + dst_bo, dst->drawable.bitsPerPixel, + color, + op)) + return TRUE; + + if (!(alu == GXcopy || alu == GXclear) || + dst->drawable.width > 8192 || dst->drawable.height > 8192) + return sna_blt_fill(sna, alu, + dst_bo, dst->drawable.bitsPerPixel, + color, + op); + + if (alu == GXclear) + color = 0; + + op->base.op = color == 0 ? PictOpClear : PictOpSrc; + + op->base.dst.pixmap = dst; + op->base.dst.width = dst->drawable.width; + op->base.dst.height = dst->drawable.height; + op->base.dst.format = sna_format_for_depth(dst->drawable.depth); + op->base.dst.bo = dst_bo; + + op->base.src.bo = + sna_render_get_solid(sna, + sna_rgba_for_color(color, + dst->drawable.depth)); + op->base.src.filter = SAMPLER_FILTER_NEAREST; + op->base.src.repeat = SAMPLER_EXTEND_REPEAT; + + op->base.is_affine = TRUE; + op->base.floats_per_vertex = 3; + op->base.u.gen5.wm_kernel = WM_KERNEL; + op->base.u.gen5.ve_id = 1; + + if (!kgem_check_bo(&sna->kgem, dst_bo)) + kgem_submit(&sna->kgem); + + gen5_fill_bind_surfaces(sna, &op->base); + gen5_align_vertex(sna, &op->base); + + op->blt = gen5_render_fill_blt; + op->done = gen5_render_fill_done; + return TRUE; +} + +static void +gen5_render_flush(struct sna *sna) +{ + gen5_vertex_finish(sna, TRUE); +} + +static void +gen5_render_context_switch(struct sna *sna, + int new_mode) +{ + if (sna->kgem.mode == 0) + return; + + /* Ironlake has a limitation that a 3D or Media command can't + * be the first command after a BLT, unless it's + * non-pipelined. Instead of trying to track it and emit a + * command at the right time, we just emit a dummy + * non-pipelined 3D instruction after each blit. + */ + if (new_mode == KGEM_BLT) { +#if 0 + OUT_BATCH(MI_FLUSH | + MI_STATE_INSTRUCTION_CACHE_FLUSH | + MI_INHIBIT_RENDER_CACHE_FLUSH); +#endif + } else { + OUT_BATCH(CMD_POLY_STIPPLE_OFFSET << 16); + OUT_BATCH(0); + } +} + +static void gen5_render_reset(struct sna *sna) +{ + sna->render_state.gen5.needs_invariant = TRUE; + sna->render_state.gen5.vb_id = 0; + sna->render_state.gen5.ve_id = -1; + sna->render_state.gen5.last_primitive = -1; + sna->render_state.gen5.last_pipelined_pointers = 0; + + sna->render_state.gen5.drawrect_offset = -1; + sna->render_state.gen5.drawrect_limit = -1; + sna->render_state.gen5.surface_table = -1; +} + +static void gen5_render_fini(struct sna *sna) +{ + kgem_bo_destroy(&sna->kgem, sna->render_state.gen5.general_bo); +} + +static uint32_t gen5_create_vs_unit_state(struct sna_static_stream *stream) +{ + struct gen5_vs_unit_state *vs = sna_static_stream_map(stream, sizeof(*vs), 32); + + /* Set up the vertex shader to be disabled (passthrough) */ + vs->thread4.nr_urb_entries = URB_VS_ENTRIES >> 2; + vs->thread4.urb_entry_allocation_size = URB_VS_ENTRY_SIZE - 1; + vs->vs6.vs_enable = 0; + vs->vs6.vert_cache_disable = 1; + + return sna_static_stream_offsetof(stream, vs); +} + +static uint32_t gen5_create_sf_state(struct sna_static_stream *stream, + uint32_t kernel) +{ + struct gen5_sf_unit_state *sf_state; + + sf_state = sna_static_stream_map(stream, sizeof(*sf_state), 32); + + sf_state->thread0.grf_reg_count = GEN5_GRF_BLOCKS(SF_KERNEL_NUM_GRF); + sf_state->thread0.kernel_start_pointer = kernel >> 6; + sf_state->sf1.single_program_flow = 1; + /* scratch space is not used in our kernel */ + sf_state->thread2.scratch_space_base_pointer = 0; + sf_state->thread3.const_urb_entry_read_length = 0; /* no const URBs */ + sf_state->thread3.const_urb_entry_read_offset = 0; /* no const URBs */ + sf_state->thread3.urb_entry_read_length = 1; /* 1 URB per vertex */ + /* don't smash vertex header, read start from dw8 */ + sf_state->thread3.urb_entry_read_offset = 1; + sf_state->thread3.dispatch_grf_start_reg = 3; + sf_state->thread4.max_threads = SF_MAX_THREADS - 1; + sf_state->thread4.urb_entry_allocation_size = URB_SF_ENTRY_SIZE - 1; + sf_state->thread4.nr_urb_entries = URB_SF_ENTRIES; + sf_state->thread4.stats_enable = 1; + sf_state->sf5.viewport_transform = FALSE; /* skip viewport */ + sf_state->sf6.cull_mode = GEN5_CULLMODE_NONE; + sf_state->sf6.scissor = 0; + sf_state->sf7.trifan_pv = 2; + sf_state->sf6.dest_org_vbias = 0x8; + sf_state->sf6.dest_org_hbias = 0x8; + + return sna_static_stream_offsetof(stream, sf_state); +} + +static uint32_t gen5_create_sampler_state(struct sna_static_stream *stream, + sampler_filter_t src_filter, + sampler_extend_t src_extend, + sampler_filter_t mask_filter, + sampler_extend_t mask_extend) +{ + struct gen5_sampler_state *sampler_state; + + sampler_state = sna_static_stream_map(stream, + sizeof(struct gen5_sampler_state) * 2, + 32); + sampler_state_init(&sampler_state[0], src_filter, src_extend); + sampler_state_init(&sampler_state[1], mask_filter, mask_extend); + + return sna_static_stream_offsetof(stream, sampler_state); +} + +static void gen5_init_wm_state(struct gen5_wm_unit_state *state, + Bool has_mask, + uint32_t kernel, + uint32_t sampler) +{ + state->thread0.grf_reg_count = GEN5_GRF_BLOCKS(PS_KERNEL_NUM_GRF); + state->thread0.kernel_start_pointer = kernel >> 6; + + state->thread1.single_program_flow = 0; + + /* scratch space is not used in our kernel */ + state->thread2.scratch_space_base_pointer = 0; + state->thread2.per_thread_scratch_space = 0; + + state->thread3.const_urb_entry_read_length = 0; + state->thread3.const_urb_entry_read_offset = 0; + + state->thread3.urb_entry_read_offset = 0; + /* wm kernel use urb from 3, see wm_program in compiler module */ + state->thread3.dispatch_grf_start_reg = 3; /* must match kernel */ + + state->wm4.sampler_count = 0; /* hardware requirement */ + + state->wm4.sampler_state_pointer = sampler >> 5; + state->wm5.max_threads = PS_MAX_THREADS - 1; + state->wm5.transposed_urb_read = 0; + state->wm5.thread_dispatch_enable = 1; + /* just use 16-pixel dispatch (4 subspans), don't need to change kernel + * start point + */ + state->wm5.enable_16_pix = 1; + state->wm5.enable_8_pix = 0; + state->wm5.early_depth_test = 1; + + /* Each pair of attributes (src/mask coords) is two URB entries */ + if (has_mask) { + state->thread1.binding_table_entry_count = 3; /* 2 tex and fb */ + state->thread3.urb_entry_read_length = 4; + } else { + state->thread1.binding_table_entry_count = 2; /* 1 tex and fb */ + state->thread3.urb_entry_read_length = 2; + } + + /* binding table entry count is only used for prefetching, + * and it has to be set 0 for Ironlake + */ + state->thread1.binding_table_entry_count = 0; +} + +static uint32_t gen5_create_cc_viewport(struct sna_static_stream *stream) +{ + struct gen5_cc_viewport vp; + + vp.min_depth = -1.e35; + vp.max_depth = 1.e35; + + return sna_static_stream_add(stream, &vp, sizeof(vp), 32); +} + +static uint32_t gen5_create_cc_unit_state(struct sna_static_stream *stream) +{ + uint8_t *ptr, *base; + uint32_t vp; + int i, j; + + vp = gen5_create_cc_viewport(stream); + base = ptr = + sna_static_stream_map(stream, + GEN5_BLENDFACTOR_COUNT*GEN5_BLENDFACTOR_COUNT*64, + 64); + + for (i = 0; i < GEN5_BLENDFACTOR_COUNT; i++) { + for (j = 0; j < GEN5_BLENDFACTOR_COUNT; j++) { + struct gen5_cc_unit_state *state = + (struct gen5_cc_unit_state *)ptr; + + state->cc3.blend_enable = 1; /* enable color blend */ + state->cc4.cc_viewport_state_offset = vp >> 5; + + state->cc5.logicop_func = 0xc; /* COPY */ + state->cc5.ia_blend_function = GEN5_BLENDFUNCTION_ADD; + + /* Fill in alpha blend factors same as color, for the future. */ + state->cc5.ia_src_blend_factor = i; + state->cc5.ia_dest_blend_factor = j; + + state->cc6.blend_function = GEN5_BLENDFUNCTION_ADD; + state->cc6.clamp_post_alpha_blend = 1; + state->cc6.clamp_pre_alpha_blend = 1; + state->cc6.src_blend_factor = i; + state->cc6.dest_blend_factor = j; + + ptr += 64; + } + } + + return sna_static_stream_offsetof(stream, base); +} + +static Bool gen5_render_setup(struct sna *sna) +{ + struct gen5_render_state *state = &sna->render_state.gen5; + struct sna_static_stream general; + struct gen5_wm_unit_state_padded *wm_state; + uint32_t sf[2], wm[KERNEL_COUNT]; + int i, j, k, l, m; + + sna_static_stream_init(&general); + + /* Zero pad the start. If you see an offset of 0x0 in the batchbuffer + * dumps, you know it points to zero. + */ + null_create(&general); + + /* Set up the two SF states (one for blending with a mask, one without) */ + sf[0] = sna_static_stream_add(&general, + sf_kernel, + sizeof(sf_kernel), + 64); + sf[1] = sna_static_stream_add(&general, + sf_kernel_mask, + sizeof(sf_kernel_mask), + 64); + for (m = 0; m < KERNEL_COUNT; m++) { + wm[m] = sna_static_stream_add(&general, + wm_kernels[m].data, + wm_kernels[m].size, + 64); + } + + state->vs = gen5_create_vs_unit_state(&general); + + state->sf[0] = gen5_create_sf_state(&general, sf[0]); + state->sf[1] = gen5_create_sf_state(&general, sf[1]); + + + /* Set up the WM states: each filter/extend type for source and mask, per + * kernel. + */ + wm_state = sna_static_stream_map(&general, + sizeof(*wm_state) * KERNEL_COUNT * + FILTER_COUNT * EXTEND_COUNT * + FILTER_COUNT * EXTEND_COUNT, + 64); + state->wm = sna_static_stream_offsetof(&general, wm_state); + for (i = 0; i < FILTER_COUNT; i++) { + for (j = 0; j < EXTEND_COUNT; j++) { + for (k = 0; k < FILTER_COUNT; k++) { + for (l = 0; l < EXTEND_COUNT; l++) { + uint32_t sampler_state; + + sampler_state = + gen5_create_sampler_state(&general, + i, j, + k, l); + + for (m = 0; m < KERNEL_COUNT; m++) { + gen5_init_wm_state(&wm_state->state, + wm_kernels[m].has_mask, + wm[m], + sampler_state); + wm_state++; + } + } + } + } + } + + state->cc = gen5_create_cc_unit_state(&general); + + state->general_bo = sna_static_stream_fini(sna, &general); + return state->general_bo != NULL; +} + +Bool gen5_render_init(struct sna *sna) +{ + if (!gen5_render_setup(sna)) + return FALSE; + + gen5_render_reset(sna); + + sna->render.composite = gen5_render_composite; + sna->render.video = gen5_render_video; + + sna->render.copy_boxes = gen5_render_copy_boxes; + sna->render.copy = gen5_render_copy; + + sna->render.fill_boxes = gen5_render_fill_boxes; + sna->render.fill = gen5_render_fill; + + sna->render.flush = gen5_render_flush; + sna->render.context_switch = gen5_render_context_switch; + sna->render.reset = gen5_render_reset; + sna->render.fini = gen5_render_fini; + + sna->render.max_3d_size = 8192; + return TRUE; +} diff --git a/src/sna/gen5_render.h b/src/sna/gen5_render.h new file mode 100644 index 00000000..190580ea --- /dev/null +++ b/src/sna/gen5_render.h @@ -0,0 +1,2730 @@ +/************************************************************************** + * + * Copyright 2003 Tungsten Graphics, Inc., Cedar Park, Texas. + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation the rights to use, copy, modify, merge, publish, + * distribute, sub license, and/or sell copies of the Software, and to + * permit persons to whom the Software is furnished to do so, subject to + * the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial portions + * of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR + * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, + * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE + * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + * + **************************************************************************/ + +#ifndef GEN5_RENDER_H +#define GEN5_RENDER_H + +#define GEN5_3D(Pipeline,Opcode,Subopcode) ((3 << 29) | \ + ((Pipeline) << 27) | \ + ((Opcode) << 24) | \ + ((Subopcode) << 16)) + +#define GEN5_URB_FENCE GEN5_3D(0, 0, 0) +#define GEN5_CS_URB_STATE GEN5_3D(0, 0, 1) +#define GEN5_CONSTANT_BUFFER GEN5_3D(0, 0, 2) +#define GEN5_STATE_PREFETCH GEN5_3D(0, 0, 3) + +#define GEN5_STATE_BASE_ADDRESS GEN5_3D(0, 1, 1) +#define GEN5_STATE_SIP GEN5_3D(0, 1, 2) + +#define GEN5_PIPELINE_SELECT GEN5_3D(1, 1, 4) + +#define GEN5_MEDIA_STATE_POINTERS GEN5_3D(2, 0, 0) +#define GEN5_MEDIA_OBJECT GEN5_3D(2, 1, 0) + +#define GEN5_3DSTATE_PIPELINED_POINTERS GEN5_3D(3, 0, 0) +#define GEN5_3DSTATE_BINDING_TABLE_POINTERS GEN5_3D(3, 0, 1) +# define GEN6_3DSTATE_BINDING_TABLE_MODIFY_PS (1 << 12)/* for GEN6 */ +# define GEN6_3DSTATE_BINDING_TABLE_MODIFY_GS (1 << 9) /* for GEN6 */ +# define GEN6_3DSTATE_BINDING_TABLE_MODIFY_VS (1 << 8) /* for GEN6 */ + +#define GEN5_3DSTATE_VERTEX_BUFFERS GEN5_3D(3, 0, 8) +#define GEN5_3DSTATE_VERTEX_ELEMENTS GEN5_3D(3, 0, 9) +#define GEN5_3DSTATE_INDEX_BUFFER GEN5_3D(3, 0, 0xa) +#define GEN5_3DSTATE_VF_STATISTICS GEN5_3D(3, 0, 0xb) + +#define GEN5_3DSTATE_DRAWING_RECTANGLE GEN5_3D(3, 1, 0) +#define GEN5_3DSTATE_CONSTANT_COLOR GEN5_3D(3, 1, 1) +#define GEN5_3DSTATE_SAMPLER_PALETTE_LOAD GEN5_3D(3, 1, 2) +#define GEN5_3DSTATE_CHROMA_KEY GEN5_3D(3, 1, 4) +#define GEN5_3DSTATE_DEPTH_BUFFER GEN5_3D(3, 1, 5) +# define GEN5_3DSTATE_DEPTH_BUFFER_TYPE_SHIFT 29 +# define GEN5_3DSTATE_DEPTH_BUFFER_FORMAT_SHIFT 18 + +#define GEN5_3DSTATE_POLY_STIPPLE_OFFSET GEN5_3D(3, 1, 6) +#define GEN5_3DSTATE_POLY_STIPPLE_PATTERN GEN5_3D(3, 1, 7) +#define GEN5_3DSTATE_LINE_STIPPLE GEN5_3D(3, 1, 8) +#define GEN5_3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP GEN5_3D(3, 1, 9) +/* These two are BLC and CTG only, not BW or CL */ +#define GEN5_3DSTATE_AA_LINE_PARAMS GEN5_3D(3, 1, 0xa) +#define GEN5_3DSTATE_GS_SVB_INDEX GEN5_3D(3, 1, 0xb) + +#define GEN5_PIPE_CONTROL GEN5_3D(3, 2, 0) + +#define GEN5_3DPRIMITIVE GEN5_3D(3, 3, 0) + +#define GEN5_3DSTATE_CLEAR_PARAMS GEN5_3D(3, 1, 0x10) +/* DW1 */ +# define GEN5_3DSTATE_DEPTH_CLEAR_VALID (1 << 15) + +/* for GEN6+ */ +#define GEN6_3DSTATE_SAMPLER_STATE_POINTERS GEN5_3D(3, 0, 0x02) +# define GEN6_3DSTATE_SAMPLER_STATE_MODIFY_PS (1 << 12) +# define GEN6_3DSTATE_SAMPLER_STATE_MODIFY_GS (1 << 9) +# define GEN6_3DSTATE_SAMPLER_STATE_MODIFY_VS (1 << 8) + +#define GEN6_3DSTATE_URB GEN5_3D(3, 0, 0x05) +/* DW1 */ +# define GEN6_3DSTATE_URB_VS_SIZE_SHIFT 16 +# define GEN6_3DSTATE_URB_VS_ENTRIES_SHIFT 0 +/* DW2 */ +# define GEN6_3DSTATE_URB_GS_ENTRIES_SHIFT 8 +# define GEN6_3DSTATE_URB_GS_SIZE_SHIFT 0 + +#define GEN6_3DSTATE_VIEWPORT_STATE_POINTERS GEN5_3D(3, 0, 0x0d) +# define GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CC (1 << 12) +# define GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_SF (1 << 11) +# define GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CLIP (1 << 10) + +#define GEN6_3DSTATE_CC_STATE_POINTERS GEN5_3D(3, 0, 0x0e) + +#define GEN6_3DSTATE_VS GEN5_3D(3, 0, 0x10) + +#define GEN6_3DSTATE_GS GEN5_3D(3, 0, 0x11) +/* DW4 */ +# define GEN6_3DSTATE_GS_DISPATCH_START_GRF_SHIFT 0 + +#define GEN6_3DSTATE_CLIP GEN5_3D(3, 0, 0x12) + +#define GEN6_3DSTATE_SF GEN5_3D(3, 0, 0x13) +/* DW1 */ +# define GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT 22 +# define GEN6_3DSTATE_SF_URB_ENTRY_READ_LENGTH_SHIFT 11 +# define GEN6_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT 4 +/* DW2 */ +/* DW3 */ +# define GEN6_3DSTATE_SF_CULL_BOTH (0 << 29) +# define GEN6_3DSTATE_SF_CULL_NONE (1 << 29) +# define GEN6_3DSTATE_SF_CULL_FRONT (2 << 29) +# define GEN6_3DSTATE_SF_CULL_BACK (3 << 29) +/* DW4 */ +# define GEN6_3DSTATE_SF_TRI_PROVOKE_SHIFT 29 +# define GEN6_3DSTATE_SF_LINE_PROVOKE_SHIFT 27 +# define GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT 25 + + +#define GEN6_3DSTATE_WM GEN5_3D(3, 0, 0x14) +/* DW2 */ +# define GEN6_3DSTATE_WM_SAMPLER_COUNT_SHITF 27 +# define GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT 18 +/* DW4 */ +# define GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT 16 +/* DW5 */ +# define GEN6_3DSTATE_WM_MAX_THREADS_SHIFT 25 +# define GEN6_3DSTATE_WM_DISPATCH_ENABLE (1 << 19) +# define GEN6_3DSTATE_WM_16_DISPATCH_ENABLE (1 << 1) +# define GEN6_3DSTATE_WM_8_DISPATCH_ENABLE (1 << 0) +/* DW6 */ +# define GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT 20 +# define GEN6_3DSTATE_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC (1 << 15) +# define GEN6_3DSTATE_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC (1 << 14) +# define GEN6_3DSTATE_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC (1 << 13) +# define GEN6_3DSTATE_WM_PERSPECTIVE_SAMPLE_BARYCENTRIC (1 << 12) +# define GEN6_3DSTATE_WM_PERSPECTIVE_CENTROID_BARYCENTRIC (1 << 11) +# define GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC (1 << 10) + + +#define GEN6_3DSTATE_CONSTANT_VS GEN5_3D(3, 0, 0x15) +#define GEN6_3DSTATE_CONSTANT_GS GEN5_3D(3, 0, 0x16) +#define GEN6_3DSTATE_CONSTANT_PS GEN5_3D(3, 0, 0x17) + +#define GEN6_3DSTATE_SAMPLE_MASK GEN5_3D(3, 0, 0x18) + +#define GEN6_3DSTATE_MULTISAMPLE GEN5_3D(3, 1, 0x0d) +/* DW1 */ +# define GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER (0 << 4) +# define GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_UPPER_LEFT (1 << 4) +# define GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1 (0 << 1) +# define GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_4 (2 << 1) +# define GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_8 (3 << 1) + +#define PIPELINE_SELECT_3D 0 +#define PIPELINE_SELECT_MEDIA 1 + +#define UF0_CS_REALLOC (1 << 13) +#define UF0_VFE_REALLOC (1 << 12) +#define UF0_SF_REALLOC (1 << 11) +#define UF0_CLIP_REALLOC (1 << 10) +#define UF0_GS_REALLOC (1 << 9) +#define UF0_VS_REALLOC (1 << 8) +#define UF1_CLIP_FENCE_SHIFT 20 +#define UF1_GS_FENCE_SHIFT 10 +#define UF1_VS_FENCE_SHIFT 0 +#define UF2_CS_FENCE_SHIFT 20 +#define UF2_VFE_FENCE_SHIFT 10 +#define UF2_SF_FENCE_SHIFT 0 + +/* for GEN5_STATE_BASE_ADDRESS */ +#define BASE_ADDRESS_MODIFY (1 << 0) + +/* for GEN5_3DSTATE_PIPELINED_POINTERS */ +#define GEN5_GS_DISABLE 0 +#define GEN5_GS_ENABLE 1 +#define GEN5_CLIP_DISABLE 0 +#define GEN5_CLIP_ENABLE 1 + +/* for GEN5_PIPE_CONTROL */ +#define GEN5_PIPE_CONTROL_NOWRITE (0 << 14) +#define GEN5_PIPE_CONTROL_WRITE_QWORD (1 << 14) +#define GEN5_PIPE_CONTROL_WRITE_DEPTH (2 << 14) +#define GEN5_PIPE_CONTROL_WRITE_TIME (3 << 14) +#define GEN5_PIPE_CONTROL_DEPTH_STALL (1 << 13) +#define GEN5_PIPE_CONTROL_WC_FLUSH (1 << 12) +#define GEN5_PIPE_CONTROL_IS_FLUSH (1 << 11) +#define GEN5_PIPE_CONTROL_TC_FLUSH (1 << 10) +#define GEN5_PIPE_CONTROL_NOTIFY_ENABLE (1 << 8) +#define GEN5_PIPE_CONTROL_GLOBAL_GTT (1 << 2) +#define GEN5_PIPE_CONTROL_LOCAL_PGTT (0 << 2) +#define GEN5_PIPE_CONTROL_DEPTH_CACHE_FLUSH (1 << 0) + +/* VERTEX_BUFFER_STATE Structure */ +#define VB0_BUFFER_INDEX_SHIFT 27 +#define GEN6_VB0_BUFFER_INDEX_SHIFT 26 +#define VB0_VERTEXDATA (0 << 26) +#define VB0_INSTANCEDATA (1 << 26) +#define GEN6_VB0_VERTEXDATA (0 << 20) +#define GEN6_VB0_INSTANCEDATA (1 << 20) +#define VB0_BUFFER_PITCH_SHIFT 0 + +/* VERTEX_ELEMENT_STATE Structure */ +#define VE0_VERTEX_BUFFER_INDEX_SHIFT 27 +#define GEN6_VE0_VERTEX_BUFFER_INDEX_SHIFT 26 /* for GEN6 */ +#define VE0_VALID (1 << 26) +#define GEN6_VE0_VALID (1 << 25) /* for GEN6 */ +#define VE0_FORMAT_SHIFT 16 +#define VE0_OFFSET_SHIFT 0 +#define VE1_VFCOMPONENT_0_SHIFT 28 +#define VE1_VFCOMPONENT_1_SHIFT 24 +#define VE1_VFCOMPONENT_2_SHIFT 20 +#define VE1_VFCOMPONENT_3_SHIFT 16 +#define VE1_DESTINATION_ELEMENT_OFFSET_SHIFT 0 + +/* 3DPRIMITIVE bits */ +#define GEN5_3DPRIMITIVE_VERTEX_SEQUENTIAL (0 << 15) +#define GEN5_3DPRIMITIVE_VERTEX_RANDOM (1 << 15) +/* Primitive types are in gen5_defines.h */ +#define GEN5_3DPRIMITIVE_TOPOLOGY_SHIFT 10 + +#define GEN5_SVG_CTL 0x7400 + +#define GEN5_SVG_CTL_GS_BA (0 << 8) +#define GEN5_SVG_CTL_SS_BA (1 << 8) +#define GEN5_SVG_CTL_IO_BA (2 << 8) +#define GEN5_SVG_CTL_GS_AUB (3 << 8) +#define GEN5_SVG_CTL_IO_AUB (4 << 8) +#define GEN5_SVG_CTL_SIP (5 << 8) + +#define GEN5_SVG_RDATA 0x7404 +#define GEN5_SVG_WORK_CTL 0x7408 + +#define GEN5_VF_CTL 0x7500 + +#define GEN5_VF_CTL_SNAPSHOT_COMPLETE (1 << 31) +#define GEN5_VF_CTL_SNAPSHOT_MUX_SELECT_THREADID (0 << 8) +#define GEN5_VF_CTL_SNAPSHOT_MUX_SELECT_VF_DEBUG (1 << 8) +#define GEN5_VF_CTL_SNAPSHOT_TYPE_VERTEX_SEQUENCE (0 << 4) +#define GEN5_VF_CTL_SNAPSHOT_TYPE_VERTEX_INDEX (1 << 4) +#define GEN5_VF_CTL_SKIP_INITIAL_PRIMITIVES (1 << 3) +#define GEN5_VF_CTL_MAX_PRIMITIVES_LIMIT_ENABLE (1 << 2) +#define GEN5_VF_CTL_VERTEX_RANGE_LIMIT_ENABLE (1 << 1) +#define GEN5_VF_CTL_SNAPSHOT_ENABLE (1 << 0) + +#define GEN5_VF_STRG_VAL 0x7504 +#define GEN5_VF_STR_VL_OVR 0x7508 +#define GEN5_VF_VC_OVR 0x750c +#define GEN5_VF_STR_PSKIP 0x7510 +#define GEN5_VF_MAX_PRIM 0x7514 +#define GEN5_VF_RDATA 0x7518 + +#define GEN5_VS_CTL 0x7600 +#define GEN5_VS_CTL_SNAPSHOT_COMPLETE (1 << 31) +#define GEN5_VS_CTL_SNAPSHOT_MUX_VERTEX_0 (0 << 8) +#define GEN5_VS_CTL_SNAPSHOT_MUX_VERTEX_1 (1 << 8) +#define GEN5_VS_CTL_SNAPSHOT_MUX_VALID_COUNT (2 << 8) +#define GEN5_VS_CTL_SNAPSHOT_MUX_VS_KERNEL_POINTER (3 << 8) +#define GEN5_VS_CTL_SNAPSHOT_ALL_THREADS (1 << 2) +#define GEN5_VS_CTL_THREAD_SNAPSHOT_ENABLE (1 << 1) +#define GEN5_VS_CTL_SNAPSHOT_ENABLE (1 << 0) + +#define GEN5_VS_STRG_VAL 0x7604 +#define GEN5_VS_RDATA 0x7608 + +#define GEN5_SF_CTL 0x7b00 +#define GEN5_SF_CTL_SNAPSHOT_COMPLETE (1 << 31) +#define GEN5_SF_CTL_SNAPSHOT_MUX_VERTEX_0_FF_ID (0 << 8) +#define GEN5_SF_CTL_SNAPSHOT_MUX_VERTEX_0_REL_COUNT (1 << 8) +#define GEN5_SF_CTL_SNAPSHOT_MUX_VERTEX_1_FF_ID (2 << 8) +#define GEN5_SF_CTL_SNAPSHOT_MUX_VERTEX_1_REL_COUNT (3 << 8) +#define GEN5_SF_CTL_SNAPSHOT_MUX_VERTEX_2_FF_ID (4 << 8) +#define GEN5_SF_CTL_SNAPSHOT_MUX_VERTEX_2_REL_COUNT (5 << 8) +#define GEN5_SF_CTL_SNAPSHOT_MUX_VERTEX_COUNT (6 << 8) +#define GEN5_SF_CTL_SNAPSHOT_MUX_SF_KERNEL_POINTER (7 << 8) +#define GEN5_SF_CTL_MIN_MAX_PRIMITIVE_RANGE_ENABLE (1 << 4) +#define GEN5_SF_CTL_DEBUG_CLIP_RECTANGLE_ENABLE (1 << 3) +#define GEN5_SF_CTL_SNAPSHOT_ALL_THREADS (1 << 2) +#define GEN5_SF_CTL_THREAD_SNAPSHOT_ENABLE (1 << 1) +#define GEN5_SF_CTL_SNAPSHOT_ENABLE (1 << 0) + +#define GEN5_SF_STRG_VAL 0x7b04 +#define GEN5_SF_RDATA 0x7b18 + +#define GEN5_WIZ_CTL 0x7c00 +#define GEN5_WIZ_CTL_SNAPSHOT_COMPLETE (1 << 31) +#define GEN5_WIZ_CTL_SUBSPAN_INSTANCE_SHIFT 16 +#define GEN5_WIZ_CTL_SNAPSHOT_MUX_WIZ_KERNEL_POINTER (0 << 8) +#define GEN5_WIZ_CTL_SNAPSHOT_MUX_SUBSPAN_INSTANCE (1 << 8) +#define GEN5_WIZ_CTL_SNAPSHOT_MUX_PRIMITIVE_SEQUENCE (2 << 8) +#define GEN5_WIZ_CTL_SINGLE_SUBSPAN_DISPATCH (1 << 6) +#define GEN5_WIZ_CTL_IGNORE_COLOR_SCOREBOARD_STALLS (1 << 5) +#define GEN5_WIZ_CTL_ENABLE_SUBSPAN_INSTANCE_COMPARE (1 << 4) +#define GEN5_WIZ_CTL_USE_UPSTREAM_SNAPSHOT_FLAG (1 << 3) +#define GEN5_WIZ_CTL_SNAPSHOT_ALL_THREADS (1 << 2) +#define GEN5_WIZ_CTL_THREAD_SNAPSHOT_ENABLE (1 << 1) +#define GEN5_WIZ_CTL_SNAPSHOT_ENABLE (1 << 0) + +#define GEN5_WIZ_STRG_VAL 0x7c04 +#define GEN5_WIZ_RDATA 0x7c18 + +#define GEN5_TS_CTL 0x7e00 +#define GEN5_TS_CTL_SNAPSHOT_COMPLETE (1 << 31) +#define GEN5_TS_CTL_SNAPSHOT_MESSAGE_ERROR (0 << 8) +#define GEN5_TS_CTL_SNAPSHOT_INTERFACE_DESCRIPTOR (3 << 8) +#define GEN5_TS_CTL_SNAPSHOT_ALL_CHILD_THREADS (1 << 2) +#define GEN5_TS_CTL_SNAPSHOT_ALL_ROOT_THREADS (1 << 1) +#define GEN5_TS_CTL_SNAPSHOT_ENABLE (1 << 0) + +#define GEN5_TS_STRG_VAL 0x7e04 +#define GEN5_TS_RDATA 0x7e08 + +#define GEN5_TD_CTL 0x8000 +#define GEN5_TD_CTL_MUX_SHIFT 8 +#define GEN5_TD_CTL_EXTERNAL_HALT_R0_DEBUG_MATCH (1 << 7) +#define GEN5_TD_CTL_FORCE_EXTERNAL_HALT (1 << 6) +#define GEN5_TD_CTL_EXCEPTION_MASK_OVERRIDE (1 << 5) +#define GEN5_TD_CTL_FORCE_THREAD_BREAKPOINT_ENABLE (1 << 4) +#define GEN5_TD_CTL_BREAKPOINT_ENABLE (1 << 2) +#define GEN5_TD_CTL2 0x8004 +#define GEN5_TD_CTL2_ILLEGAL_OPCODE_EXCEPTION_OVERRIDE (1 << 28) +#define GEN5_TD_CTL2_MASKSTACK_EXCEPTION_OVERRIDE (1 << 26) +#define GEN5_TD_CTL2_SOFTWARE_EXCEPTION_OVERRIDE (1 << 25) +#define GEN5_TD_CTL2_ACTIVE_THREAD_LIMIT_SHIFT 16 +#define GEN5_TD_CTL2_ACTIVE_THREAD_LIMIT_ENABLE (1 << 8) +#define GEN5_TD_CTL2_THREAD_SPAWNER_EXECUTION_MASK_ENABLE (1 << 7) +#define GEN5_TD_CTL2_WIZ_EXECUTION_MASK_ENABLE (1 << 6) +#define GEN5_TD_CTL2_SF_EXECUTION_MASK_ENABLE (1 << 5) +#define GEN5_TD_CTL2_CLIPPER_EXECUTION_MASK_ENABLE (1 << 4) +#define GEN5_TD_CTL2_GS_EXECUTION_MASK_ENABLE (1 << 3) +#define GEN5_TD_CTL2_VS_EXECUTION_MASK_ENABLE (1 << 0) +#define GEN5_TD_VF_VS_EMSK 0x8008 +#define GEN5_TD_GS_EMSK 0x800c +#define GEN5_TD_CLIP_EMSK 0x8010 +#define GEN5_TD_SF_EMSK 0x8014 +#define GEN5_TD_WIZ_EMSK 0x8018 +#define GEN5_TD_0_6_EHTRG_VAL 0x801c +#define GEN5_TD_0_7_EHTRG_VAL 0x8020 +#define GEN5_TD_0_6_EHTRG_MSK 0x8024 +#define GEN5_TD_0_7_EHTRG_MSK 0x8028 +#define GEN5_TD_RDATA 0x802c +#define GEN5_TD_TS_EMSK 0x8030 + +#define GEN5_EU_CTL 0x8800 +#define GEN5_EU_CTL_SELECT_SHIFT 16 +#define GEN5_EU_CTL_DATA_MUX_SHIFT 8 +#define GEN5_EU_ATT_0 0x8810 +#define GEN5_EU_ATT_1 0x8814 +#define GEN5_EU_ATT_DATA_0 0x8820 +#define GEN5_EU_ATT_DATA_1 0x8824 +#define GEN5_EU_ATT_CLR_0 0x8830 +#define GEN5_EU_ATT_CLR_1 0x8834 +#define GEN5_EU_RDATA 0x8840 + +/* 3D state: + */ +#define _3DOP_3DSTATE_PIPELINED 0x0 +#define _3DOP_3DSTATE_NONPIPELINED 0x1 +#define _3DOP_3DCONTROL 0x2 +#define _3DOP_3DPRIMITIVE 0x3 + +#define _3DSTATE_PIPELINED_POINTERS 0x00 +#define _3DSTATE_BINDING_TABLE_POINTERS 0x01 +#define _3DSTATE_VERTEX_BUFFERS 0x08 +#define _3DSTATE_VERTEX_ELEMENTS 0x09 +#define _3DSTATE_INDEX_BUFFER 0x0A +#define _3DSTATE_VF_STATISTICS 0x0B +#define _3DSTATE_DRAWING_RECTANGLE 0x00 +#define _3DSTATE_CONSTANT_COLOR 0x01 +#define _3DSTATE_SAMPLER_PALETTE_LOAD 0x02 +#define _3DSTATE_CHROMA_KEY 0x04 +#define _3DSTATE_DEPTH_BUFFER 0x05 +#define _3DSTATE_POLY_STIPPLE_OFFSET 0x06 +#define _3DSTATE_POLY_STIPPLE_PATTERN 0x07 +#define _3DSTATE_LINE_STIPPLE 0x08 +#define _3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP 0x09 +#define _3DCONTROL 0x00 +#define _3DPRIMITIVE 0x00 + +#define _3DPRIM_POINTLIST 0x01 +#define _3DPRIM_LINELIST 0x02 +#define _3DPRIM_LINESTRIP 0x03 +#define _3DPRIM_TRILIST 0x04 +#define _3DPRIM_TRISTRIP 0x05 +#define _3DPRIM_TRIFAN 0x06 +#define _3DPRIM_QUADLIST 0x07 +#define _3DPRIM_QUADSTRIP 0x08 +#define _3DPRIM_LINELIST_ADJ 0x09 +#define _3DPRIM_LINESTRIP_ADJ 0x0A +#define _3DPRIM_TRILIST_ADJ 0x0B +#define _3DPRIM_TRISTRIP_ADJ 0x0C +#define _3DPRIM_TRISTRIP_REVERSE 0x0D +#define _3DPRIM_POLYGON 0x0E +#define _3DPRIM_RECTLIST 0x0F +#define _3DPRIM_LINELOOP 0x10 +#define _3DPRIM_POINTLIST_BF 0x11 +#define _3DPRIM_LINESTRIP_CONT 0x12 +#define _3DPRIM_LINESTRIP_BF 0x13 +#define _3DPRIM_LINESTRIP_CONT_BF 0x14 +#define _3DPRIM_TRIFAN_NOSTIPPLE 0x15 + +#define _3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL 0 +#define _3DPRIM_VERTEXBUFFER_ACCESS_RANDOM 1 + +#define GEN5_ANISORATIO_2 0 +#define GEN5_ANISORATIO_4 1 +#define GEN5_ANISORATIO_6 2 +#define GEN5_ANISORATIO_8 3 +#define GEN5_ANISORATIO_10 4 +#define GEN5_ANISORATIO_12 5 +#define GEN5_ANISORATIO_14 6 +#define GEN5_ANISORATIO_16 7 + +#define GEN5_BLENDFACTOR_ONE 0x1 +#define GEN5_BLENDFACTOR_SRC_COLOR 0x2 +#define GEN5_BLENDFACTOR_SRC_ALPHA 0x3 +#define GEN5_BLENDFACTOR_DST_ALPHA 0x4 +#define GEN5_BLENDFACTOR_DST_COLOR 0x5 +#define GEN5_BLENDFACTOR_SRC_ALPHA_SATURATE 0x6 +#define GEN5_BLENDFACTOR_CONST_COLOR 0x7 +#define GEN5_BLENDFACTOR_CONST_ALPHA 0x8 +#define GEN5_BLENDFACTOR_SRC1_COLOR 0x9 +#define GEN5_BLENDFACTOR_SRC1_ALPHA 0x0A +#define GEN5_BLENDFACTOR_ZERO 0x11 +#define GEN5_BLENDFACTOR_INV_SRC_COLOR 0x12 +#define GEN5_BLENDFACTOR_INV_SRC_ALPHA 0x13 +#define GEN5_BLENDFACTOR_INV_DST_ALPHA 0x14 +#define GEN5_BLENDFACTOR_INV_DST_COLOR 0x15 +#define GEN5_BLENDFACTOR_INV_CONST_COLOR 0x17 +#define GEN5_BLENDFACTOR_INV_CONST_ALPHA 0x18 +#define GEN5_BLENDFACTOR_INV_SRC1_COLOR 0x19 +#define GEN5_BLENDFACTOR_INV_SRC1_ALPHA 0x1A + +#define GEN5_BLENDFUNCTION_ADD 0 +#define GEN5_BLENDFUNCTION_SUBTRACT 1 +#define GEN5_BLENDFUNCTION_REVERSE_SUBTRACT 2 +#define GEN5_BLENDFUNCTION_MIN 3 +#define GEN5_BLENDFUNCTION_MAX 4 + +#define GEN5_ALPHATEST_FORMAT_UNORM8 0 +#define GEN5_ALPHATEST_FORMAT_FLOAT32 1 + +#define GEN5_CHROMAKEY_KILL_ON_ANY_MATCH 0 +#define GEN5_CHROMAKEY_REPLACE_BLACK 1 + +#define GEN5_CLIP_API_OGL 0 +#define GEN5_CLIP_API_DX 1 + +#define GEN5_CLIPMODE_NORMAL 0 +#define GEN5_CLIPMODE_CLIP_ALL 1 +#define GEN5_CLIPMODE_CLIP_NON_REJECTED 2 +#define GEN5_CLIPMODE_REJECT_ALL 3 +#define GEN5_CLIPMODE_ACCEPT_ALL 4 + +#define GEN5_CLIP_NDCSPACE 0 +#define GEN5_CLIP_SCREENSPACE 1 + +#define GEN5_COMPAREFUNCTION_ALWAYS 0 +#define GEN5_COMPAREFUNCTION_NEVER 1 +#define GEN5_COMPAREFUNCTION_LESS 2 +#define GEN5_COMPAREFUNCTION_EQUAL 3 +#define GEN5_COMPAREFUNCTION_LEQUAL 4 +#define GEN5_COMPAREFUNCTION_GREATER 5 +#define GEN5_COMPAREFUNCTION_NOTEQUAL 6 +#define GEN5_COMPAREFUNCTION_GEQUAL 7 + +#define GEN5_COVERAGE_PIXELS_HALF 0 +#define GEN5_COVERAGE_PIXELS_1 1 +#define GEN5_COVERAGE_PIXELS_2 2 +#define GEN5_COVERAGE_PIXELS_4 3 + +#define GEN5_CULLMODE_BOTH 0 +#define GEN5_CULLMODE_NONE 1 +#define GEN5_CULLMODE_FRONT 2 +#define GEN5_CULLMODE_BACK 3 + +#define GEN5_DEFAULTCOLOR_R8G8B8A8_UNORM 0 +#define GEN5_DEFAULTCOLOR_R32G32B32A32_FLOAT 1 + +#define GEN5_DEPTHFORMAT_D32_FLOAT_S8X24_UINT 0 +#define GEN5_DEPTHFORMAT_D32_FLOAT 1 +#define GEN5_DEPTHFORMAT_D24_UNORM_S8_UINT 2 +#define GEN5_DEPTHFORMAT_D16_UNORM 5 + +#define GEN5_FLOATING_POINT_IEEE_754 0 +#define GEN5_FLOATING_POINT_NON_IEEE_754 1 + +#define GEN5_FRONTWINDING_CW 0 +#define GEN5_FRONTWINDING_CCW 1 + +#define GEN5_INDEX_BYTE 0 +#define GEN5_INDEX_WORD 1 +#define GEN5_INDEX_DWORD 2 + +#define GEN5_LOGICOPFUNCTION_CLEAR 0 +#define GEN5_LOGICOPFUNCTION_NOR 1 +#define GEN5_LOGICOPFUNCTION_AND_INVERTED 2 +#define GEN5_LOGICOPFUNCTION_COPY_INVERTED 3 +#define GEN5_LOGICOPFUNCTION_AND_REVERSE 4 +#define GEN5_LOGICOPFUNCTION_INVERT 5 +#define GEN5_LOGICOPFUNCTION_XOR 6 +#define GEN5_LOGICOPFUNCTION_NAND 7 +#define GEN5_LOGICOPFUNCTION_AND 8 +#define GEN5_LOGICOPFUNCTION_EQUIV 9 +#define GEN5_LOGICOPFUNCTION_NOOP 10 +#define GEN5_LOGICOPFUNCTION_OR_INVERTED 11 +#define GEN5_LOGICOPFUNCTION_COPY 12 +#define GEN5_LOGICOPFUNCTION_OR_REVERSE 13 +#define GEN5_LOGICOPFUNCTION_OR 14 +#define GEN5_LOGICOPFUNCTION_SET 15 + +#define GEN5_MAPFILTER_NEAREST 0x0 +#define GEN5_MAPFILTER_LINEAR 0x1 +#define GEN5_MAPFILTER_ANISOTROPIC 0x2 + +#define GEN5_MIPFILTER_NONE 0 +#define GEN5_MIPFILTER_NEAREST 1 +#define GEN5_MIPFILTER_LINEAR 3 + +#define GEN5_POLYGON_FRONT_FACING 0 +#define GEN5_POLYGON_BACK_FACING 1 + +#define GEN5_PREFILTER_ALWAYS 0x0 +#define GEN5_PREFILTER_NEVER 0x1 +#define GEN5_PREFILTER_LESS 0x2 +#define GEN5_PREFILTER_EQUAL 0x3 +#define GEN5_PREFILTER_LEQUAL 0x4 +#define GEN5_PREFILTER_GREATER 0x5 +#define GEN5_PREFILTER_NOTEQUAL 0x6 +#define GEN5_PREFILTER_GEQUAL 0x7 + +#define GEN5_PROVOKING_VERTEX_0 0 +#define GEN5_PROVOKING_VERTEX_1 1 +#define GEN5_PROVOKING_VERTEX_2 2 + +#define GEN5_RASTRULE_UPPER_LEFT 0 +#define GEN5_RASTRULE_UPPER_RIGHT 1 + +#define GEN5_RENDERTARGET_CLAMPRANGE_UNORM 0 +#define GEN5_RENDERTARGET_CLAMPRANGE_SNORM 1 +#define GEN5_RENDERTARGET_CLAMPRANGE_FORMAT 2 + +#define GEN5_STENCILOP_KEEP 0 +#define GEN5_STENCILOP_ZERO 1 +#define GEN5_STENCILOP_REPLACE 2 +#define GEN5_STENCILOP_INCRSAT 3 +#define GEN5_STENCILOP_DECRSAT 4 +#define GEN5_STENCILOP_INCR 5 +#define GEN5_STENCILOP_DECR 6 +#define GEN5_STENCILOP_INVERT 7 + +#define GEN5_SURFACE_MIPMAPLAYOUT_BELOW 0 +#define GEN5_SURFACE_MIPMAPLAYOUT_RIGHT 1 + +#define GEN5_SURFACEFORMAT_R32G32B32A32_FLOAT 0x000 +#define GEN5_SURFACEFORMAT_R32G32B32A32_SINT 0x001 +#define GEN5_SURFACEFORMAT_R32G32B32A32_UINT 0x002 +#define GEN5_SURFACEFORMAT_R32G32B32A32_UNORM 0x003 +#define GEN5_SURFACEFORMAT_R32G32B32A32_SNORM 0x004 +#define GEN5_SURFACEFORMAT_R64G64_FLOAT 0x005 +#define GEN5_SURFACEFORMAT_R32G32B32X32_FLOAT 0x006 +#define GEN5_SURFACEFORMAT_R32G32B32A32_SSCALED 0x007 +#define GEN5_SURFACEFORMAT_R32G32B32A32_USCALED 0x008 +#define GEN5_SURFACEFORMAT_R32G32B32_FLOAT 0x040 +#define GEN5_SURFACEFORMAT_R32G32B32_SINT 0x041 +#define GEN5_SURFACEFORMAT_R32G32B32_UINT 0x042 +#define GEN5_SURFACEFORMAT_R32G32B32_UNORM 0x043 +#define GEN5_SURFACEFORMAT_R32G32B32_SNORM 0x044 +#define GEN5_SURFACEFORMAT_R32G32B32_SSCALED 0x045 +#define GEN5_SURFACEFORMAT_R32G32B32_USCALED 0x046 +#define GEN5_SURFACEFORMAT_R16G16B16A16_UNORM 0x080 +#define GEN5_SURFACEFORMAT_R16G16B16A16_SNORM 0x081 +#define GEN5_SURFACEFORMAT_R16G16B16A16_SINT 0x082 +#define GEN5_SURFACEFORMAT_R16G16B16A16_UINT 0x083 +#define GEN5_SURFACEFORMAT_R16G16B16A16_FLOAT 0x084 +#define GEN5_SURFACEFORMAT_R32G32_FLOAT 0x085 +#define GEN5_SURFACEFORMAT_R32G32_SINT 0x086 +#define GEN5_SURFACEFORMAT_R32G32_UINT 0x087 +#define GEN5_SURFACEFORMAT_R32_FLOAT_X8X24_TYPELESS 0x088 +#define GEN5_SURFACEFORMAT_X32_TYPELESS_G8X24_UINT 0x089 +#define GEN5_SURFACEFORMAT_L32A32_FLOAT 0x08A +#define GEN5_SURFACEFORMAT_R32G32_UNORM 0x08B +#define GEN5_SURFACEFORMAT_R32G32_SNORM 0x08C +#define GEN5_SURFACEFORMAT_R64_FLOAT 0x08D +#define GEN5_SURFACEFORMAT_R16G16B16X16_UNORM 0x08E +#define GEN5_SURFACEFORMAT_R16G16B16X16_FLOAT 0x08F +#define GEN5_SURFACEFORMAT_A32X32_FLOAT 0x090 +#define GEN5_SURFACEFORMAT_L32X32_FLOAT 0x091 +#define GEN5_SURFACEFORMAT_I32X32_FLOAT 0x092 +#define GEN5_SURFACEFORMAT_R16G16B16A16_SSCALED 0x093 +#define GEN5_SURFACEFORMAT_R16G16B16A16_USCALED 0x094 +#define GEN5_SURFACEFORMAT_R32G32_SSCALED 0x095 +#define GEN5_SURFACEFORMAT_R32G32_USCALED 0x096 +#define GEN5_SURFACEFORMAT_B8G8R8A8_UNORM 0x0C0 +#define GEN5_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB 0x0C1 +#define GEN5_SURFACEFORMAT_R10G10B10A2_UNORM 0x0C2 +#define GEN5_SURFACEFORMAT_R10G10B10A2_UNORM_SRGB 0x0C3 +#define GEN5_SURFACEFORMAT_R10G10B10A2_UINT 0x0C4 +#define GEN5_SURFACEFORMAT_R10G10B10_SNORM_A2_UNORM 0x0C5 +#define GEN5_SURFACEFORMAT_R8G8B8A8_UNORM 0x0C7 +#define GEN5_SURFACEFORMAT_R8G8B8A8_UNORM_SRGB 0x0C8 +#define GEN5_SURFACEFORMAT_R8G8B8A8_SNORM 0x0C9 +#define GEN5_SURFACEFORMAT_R8G8B8A8_SINT 0x0CA +#define GEN5_SURFACEFORMAT_R8G8B8A8_UINT 0x0CB +#define GEN5_SURFACEFORMAT_R16G16_UNORM 0x0CC +#define GEN5_SURFACEFORMAT_R16G16_SNORM 0x0CD +#define GEN5_SURFACEFORMAT_R16G16_SINT 0x0CE +#define GEN5_SURFACEFORMAT_R16G16_UINT 0x0CF +#define GEN5_SURFACEFORMAT_R16G16_FLOAT 0x0D0 +#define GEN5_SURFACEFORMAT_B10G10R10A2_UNORM 0x0D1 +#define GEN5_SURFACEFORMAT_B10G10R10A2_UNORM_SRGB 0x0D2 +#define GEN5_SURFACEFORMAT_R11G11B10_FLOAT 0x0D3 +#define GEN5_SURFACEFORMAT_R32_SINT 0x0D6 +#define GEN5_SURFACEFORMAT_R32_UINT 0x0D7 +#define GEN5_SURFACEFORMAT_R32_FLOAT 0x0D8 +#define GEN5_SURFACEFORMAT_R24_UNORM_X8_TYPELESS 0x0D9 +#define GEN5_SURFACEFORMAT_X24_TYPELESS_G8_UINT 0x0DA +#define GEN5_SURFACEFORMAT_L16A16_UNORM 0x0DF +#define GEN5_SURFACEFORMAT_I24X8_UNORM 0x0E0 +#define GEN5_SURFACEFORMAT_L24X8_UNORM 0x0E1 +#define GEN5_SURFACEFORMAT_A24X8_UNORM 0x0E2 +#define GEN5_SURFACEFORMAT_I32_FLOAT 0x0E3 +#define GEN5_SURFACEFORMAT_L32_FLOAT 0x0E4 +#define GEN5_SURFACEFORMAT_A32_FLOAT 0x0E5 +#define GEN5_SURFACEFORMAT_B8G8R8X8_UNORM 0x0E9 +#define GEN5_SURFACEFORMAT_B8G8R8X8_UNORM_SRGB 0x0EA +#define GEN5_SURFACEFORMAT_R8G8B8X8_UNORM 0x0EB +#define GEN5_SURFACEFORMAT_R8G8B8X8_UNORM_SRGB 0x0EC +#define GEN5_SURFACEFORMAT_R9G9B9E5_SHAREDEXP 0x0ED +#define GEN5_SURFACEFORMAT_B10G10R10X2_UNORM 0x0EE +#define GEN5_SURFACEFORMAT_L16A16_FLOAT 0x0F0 +#define GEN5_SURFACEFORMAT_R32_UNORM 0x0F1 +#define GEN5_SURFACEFORMAT_R32_SNORM 0x0F2 +#define GEN5_SURFACEFORMAT_R10G10B10X2_USCALED 0x0F3 +#define GEN5_SURFACEFORMAT_R8G8B8A8_SSCALED 0x0F4 +#define GEN5_SURFACEFORMAT_R8G8B8A8_USCALED 0x0F5 +#define GEN5_SURFACEFORMAT_R16G16_SSCALED 0x0F6 +#define GEN5_SURFACEFORMAT_R16G16_USCALED 0x0F7 +#define GEN5_SURFACEFORMAT_R32_SSCALED 0x0F8 +#define GEN5_SURFACEFORMAT_R32_USCALED 0x0F9 +#define GEN5_SURFACEFORMAT_B5G6R5_UNORM 0x100 +#define GEN5_SURFACEFORMAT_B5G6R5_UNORM_SRGB 0x101 +#define GEN5_SURFACEFORMAT_B5G5R5A1_UNORM 0x102 +#define GEN5_SURFACEFORMAT_B5G5R5A1_UNORM_SRGB 0x103 +#define GEN5_SURFACEFORMAT_B4G4R4A4_UNORM 0x104 +#define GEN5_SURFACEFORMAT_B4G4R4A4_UNORM_SRGB 0x105 +#define GEN5_SURFACEFORMAT_R8G8_UNORM 0x106 +#define GEN5_SURFACEFORMAT_R8G8_SNORM 0x107 +#define GEN5_SURFACEFORMAT_R8G8_SINT 0x108 +#define GEN5_SURFACEFORMAT_R8G8_UINT 0x109 +#define GEN5_SURFACEFORMAT_R16_UNORM 0x10A +#define GEN5_SURFACEFORMAT_R16_SNORM 0x10B +#define GEN5_SURFACEFORMAT_R16_SINT 0x10C +#define GEN5_SURFACEFORMAT_R16_UINT 0x10D +#define GEN5_SURFACEFORMAT_R16_FLOAT 0x10E +#define GEN5_SURFACEFORMAT_I16_UNORM 0x111 +#define GEN5_SURFACEFORMAT_L16_UNORM 0x112 +#define GEN5_SURFACEFORMAT_A16_UNORM 0x113 +#define GEN5_SURFACEFORMAT_L8A8_UNORM 0x114 +#define GEN5_SURFACEFORMAT_I16_FLOAT 0x115 +#define GEN5_SURFACEFORMAT_L16_FLOAT 0x116 +#define GEN5_SURFACEFORMAT_A16_FLOAT 0x117 +#define GEN5_SURFACEFORMAT_R5G5_SNORM_B6_UNORM 0x119 +#define GEN5_SURFACEFORMAT_B5G5R5X1_UNORM 0x11A +#define GEN5_SURFACEFORMAT_B5G5R5X1_UNORM_SRGB 0x11B +#define GEN5_SURFACEFORMAT_R8G8_SSCALED 0x11C +#define GEN5_SURFACEFORMAT_R8G8_USCALED 0x11D +#define GEN5_SURFACEFORMAT_R16_SSCALED 0x11E +#define GEN5_SURFACEFORMAT_R16_USCALED 0x11F +#define GEN5_SURFACEFORMAT_R8_UNORM 0x140 +#define GEN5_SURFACEFORMAT_R8_SNORM 0x141 +#define GEN5_SURFACEFORMAT_R8_SINT 0x142 +#define GEN5_SURFACEFORMAT_R8_UINT 0x143 +#define GEN5_SURFACEFORMAT_A8_UNORM 0x144 +#define GEN5_SURFACEFORMAT_I8_UNORM 0x145 +#define GEN5_SURFACEFORMAT_L8_UNORM 0x146 +#define GEN5_SURFACEFORMAT_P4A4_UNORM 0x147 +#define GEN5_SURFACEFORMAT_A4P4_UNORM 0x148 +#define GEN5_SURFACEFORMAT_R8_SSCALED 0x149 +#define GEN5_SURFACEFORMAT_R8_USCALED 0x14A +#define GEN5_SURFACEFORMAT_R1_UINT 0x181 +#define GEN5_SURFACEFORMAT_YCRCB_NORMAL 0x182 +#define GEN5_SURFACEFORMAT_YCRCB_SWAPUVY 0x183 +#define GEN5_SURFACEFORMAT_BC1_UNORM 0x186 +#define GEN5_SURFACEFORMAT_BC2_UNORM 0x187 +#define GEN5_SURFACEFORMAT_BC3_UNORM 0x188 +#define GEN5_SURFACEFORMAT_BC4_UNORM 0x189 +#define GEN5_SURFACEFORMAT_BC5_UNORM 0x18A +#define GEN5_SURFACEFORMAT_BC1_UNORM_SRGB 0x18B +#define GEN5_SURFACEFORMAT_BC2_UNORM_SRGB 0x18C +#define GEN5_SURFACEFORMAT_BC3_UNORM_SRGB 0x18D +#define GEN5_SURFACEFORMAT_MONO8 0x18E +#define GEN5_SURFACEFORMAT_YCRCB_SWAPUV 0x18F +#define GEN5_SURFACEFORMAT_YCRCB_SWAPY 0x190 +#define GEN5_SURFACEFORMAT_DXT1_RGB 0x191 +#define GEN5_SURFACEFORMAT_FXT1 0x192 +#define GEN5_SURFACEFORMAT_R8G8B8_UNORM 0x193 +#define GEN5_SURFACEFORMAT_R8G8B8_SNORM 0x194 +#define GEN5_SURFACEFORMAT_R8G8B8_SSCALED 0x195 +#define GEN5_SURFACEFORMAT_R8G8B8_USCALED 0x196 +#define GEN5_SURFACEFORMAT_R64G64B64A64_FLOAT 0x197 +#define GEN5_SURFACEFORMAT_R64G64B64_FLOAT 0x198 +#define GEN5_SURFACEFORMAT_BC4_SNORM 0x199 +#define GEN5_SURFACEFORMAT_BC5_SNORM 0x19A +#define GEN5_SURFACEFORMAT_R16G16B16_UNORM 0x19C +#define GEN5_SURFACEFORMAT_R16G16B16_SNORM 0x19D +#define GEN5_SURFACEFORMAT_R16G16B16_SSCALED 0x19E +#define GEN5_SURFACEFORMAT_R16G16B16_USCALED 0x19F + +#define GEN5_SURFACERETURNFORMAT_FLOAT32 0 +#define GEN5_SURFACERETURNFORMAT_S1 1 + +#define GEN5_SURFACE_1D 0 +#define GEN5_SURFACE_2D 1 +#define GEN5_SURFACE_3D 2 +#define GEN5_SURFACE_CUBE 3 +#define GEN5_SURFACE_BUFFER 4 +#define GEN5_SURFACE_NULL 7 + +#define GEN5_BORDER_COLOR_MODE_DEFAULT 0 +#define GEN5_BORDER_COLOR_MODE_LEGACY 1 + +#define GEN5_TEXCOORDMODE_WRAP 0 +#define GEN5_TEXCOORDMODE_MIRROR 1 +#define GEN5_TEXCOORDMODE_CLAMP 2 +#define GEN5_TEXCOORDMODE_CUBE 3 +#define GEN5_TEXCOORDMODE_CLAMP_BORDER 4 +#define GEN5_TEXCOORDMODE_MIRROR_ONCE 5 + +#define GEN5_THREAD_PRIORITY_NORMAL 0 +#define GEN5_THREAD_PRIORITY_HIGH 1 + +#define GEN5_TILEWALK_XMAJOR 0 +#define GEN5_TILEWALK_YMAJOR 1 + +#define GEN5_VERTEX_SUBPIXEL_PRECISION_8BITS 0 +#define GEN5_VERTEX_SUBPIXEL_PRECISION_4BITS 1 + +#define GEN5_VERTEXBUFFER_ACCESS_VERTEXDATA 0 +#define GEN5_VERTEXBUFFER_ACCESS_INSTANCEDATA 1 + +#define GEN5_VFCOMPONENT_NOSTORE 0 +#define GEN5_VFCOMPONENT_STORE_SRC 1 +#define GEN5_VFCOMPONENT_STORE_0 2 +#define GEN5_VFCOMPONENT_STORE_1_FLT 3 +#define GEN5_VFCOMPONENT_STORE_1_INT 4 +#define GEN5_VFCOMPONENT_STORE_VID 5 +#define GEN5_VFCOMPONENT_STORE_IID 6 +#define GEN5_VFCOMPONENT_STORE_PID 7 + + + +/* Execution Unit (EU) defines + */ + +#define GEN5_ALIGN_1 0 +#define GEN5_ALIGN_16 1 + +#define GEN5_ADDRESS_DIRECT 0 +#define GEN5_ADDRESS_REGISTER_INDIRECT_REGISTER 1 + +#define GEN5_CHANNEL_X 0 +#define GEN5_CHANNEL_Y 1 +#define GEN5_CHANNEL_Z 2 +#define GEN5_CHANNEL_W 3 + +#define GEN5_COMPRESSION_NONE 0 +#define GEN5_COMPRESSION_2NDHALF 1 +#define GEN5_COMPRESSION_COMPRESSED 2 + +#define GEN5_CONDITIONAL_NONE 0 +#define GEN5_CONDITIONAL_Z 1 +#define GEN5_CONDITIONAL_NZ 2 +#define GEN5_CONDITIONAL_EQ 1 /* Z */ +#define GEN5_CONDITIONAL_NEQ 2 /* NZ */ +#define GEN5_CONDITIONAL_G 3 +#define GEN5_CONDITIONAL_GE 4 +#define GEN5_CONDITIONAL_L 5 +#define GEN5_CONDITIONAL_LE 6 +#define GEN5_CONDITIONAL_C 7 +#define GEN5_CONDITIONAL_O 8 + +#define GEN5_DEBUG_NONE 0 +#define GEN5_DEBUG_BREAKPOINT 1 + +#define GEN5_DEPENDENCY_NORMAL 0 +#define GEN5_DEPENDENCY_NOTCLEARED 1 +#define GEN5_DEPENDENCY_NOTCHECKED 2 +#define GEN5_DEPENDENCY_DISABLE 3 + +#define GEN5_EXECUTE_1 0 +#define GEN5_EXECUTE_2 1 +#define GEN5_EXECUTE_4 2 +#define GEN5_EXECUTE_8 3 +#define GEN5_EXECUTE_16 4 +#define GEN5_EXECUTE_32 5 + +#define GEN5_HORIZONTAL_STRIDE_0 0 +#define GEN5_HORIZONTAL_STRIDE_1 1 +#define GEN5_HORIZONTAL_STRIDE_2 2 +#define GEN5_HORIZONTAL_STRIDE_4 3 + +#define GEN5_INSTRUCTION_NORMAL 0 +#define GEN5_INSTRUCTION_SATURATE 1 + +#define GEN5_MASK_ENABLE 0 +#define GEN5_MASK_DISABLE 1 + +#define GEN5_OPCODE_MOV 1 +#define GEN5_OPCODE_SEL 2 +#define GEN5_OPCODE_NOT 4 +#define GEN5_OPCODE_AND 5 +#define GEN5_OPCODE_OR 6 +#define GEN5_OPCODE_XOR 7 +#define GEN5_OPCODE_SHR 8 +#define GEN5_OPCODE_SHL 9 +#define GEN5_OPCODE_RSR 10 +#define GEN5_OPCODE_RSL 11 +#define GEN5_OPCODE_ASR 12 +#define GEN5_OPCODE_CMP 16 +#define GEN5_OPCODE_JMPI 32 +#define GEN5_OPCODE_IF 34 +#define GEN5_OPCODE_IFF 35 +#define GEN5_OPCODE_ELSE 36 +#define GEN5_OPCODE_ENDIF 37 +#define GEN5_OPCODE_DO 38 +#define GEN5_OPCODE_WHILE 39 +#define GEN5_OPCODE_BREAK 40 +#define GEN5_OPCODE_CONTINUE 41 +#define GEN5_OPCODE_HALT 42 +#define GEN5_OPCODE_MSAVE 44 +#define GEN5_OPCODE_MRESTORE 45 +#define GEN5_OPCODE_PUSH 46 +#define GEN5_OPCODE_POP 47 +#define GEN5_OPCODE_WAIT 48 +#define GEN5_OPCODE_SEND 49 +#define GEN5_OPCODE_ADD 64 +#define GEN5_OPCODE_MUL 65 +#define GEN5_OPCODE_AVG 66 +#define GEN5_OPCODE_FRC 67 +#define GEN5_OPCODE_RNDU 68 +#define GEN5_OPCODE_RNDD 69 +#define GEN5_OPCODE_RNDE 70 +#define GEN5_OPCODE_RNDZ 71 +#define GEN5_OPCODE_MAC 72 +#define GEN5_OPCODE_MACH 73 +#define GEN5_OPCODE_LZD 74 +#define GEN5_OPCODE_SAD2 80 +#define GEN5_OPCODE_SADA2 81 +#define GEN5_OPCODE_DP4 84 +#define GEN5_OPCODE_DPH 85 +#define GEN5_OPCODE_DP3 86 +#define GEN5_OPCODE_DP2 87 +#define GEN5_OPCODE_DPA2 88 +#define GEN5_OPCODE_LINE 89 +#define GEN5_OPCODE_NOP 126 + +#define GEN5_PREDICATE_NONE 0 +#define GEN5_PREDICATE_NORMAL 1 +#define GEN5_PREDICATE_ALIGN1_ANYV 2 +#define GEN5_PREDICATE_ALIGN1_ALLV 3 +#define GEN5_PREDICATE_ALIGN1_ANY2H 4 +#define GEN5_PREDICATE_ALIGN1_ALL2H 5 +#define GEN5_PREDICATE_ALIGN1_ANY4H 6 +#define GEN5_PREDICATE_ALIGN1_ALL4H 7 +#define GEN5_PREDICATE_ALIGN1_ANY8H 8 +#define GEN5_PREDICATE_ALIGN1_ALL8H 9 +#define GEN5_PREDICATE_ALIGN1_ANY16H 10 +#define GEN5_PREDICATE_ALIGN1_ALL16H 11 +#define GEN5_PREDICATE_ALIGN16_REPLICATE_X 2 +#define GEN5_PREDICATE_ALIGN16_REPLICATE_Y 3 +#define GEN5_PREDICATE_ALIGN16_REPLICATE_Z 4 +#define GEN5_PREDICATE_ALIGN16_REPLICATE_W 5 +#define GEN5_PREDICATE_ALIGN16_ANY4H 6 +#define GEN5_PREDICATE_ALIGN16_ALL4H 7 + +#define GEN5_ARCHITECTURE_REGISTER_FILE 0 +#define GEN5_GENERAL_REGISTER_FILE 1 +#define GEN5_MESSAGE_REGISTER_FILE 2 +#define GEN5_IMMEDIATE_VALUE 3 + +#define GEN5_REGISTER_TYPE_UD 0 +#define GEN5_REGISTER_TYPE_D 1 +#define GEN5_REGISTER_TYPE_UW 2 +#define GEN5_REGISTER_TYPE_W 3 +#define GEN5_REGISTER_TYPE_UB 4 +#define GEN5_REGISTER_TYPE_B 5 +#define GEN5_REGISTER_TYPE_VF 5 /* packed float vector, immediates only? */ +#define GEN5_REGISTER_TYPE_HF 6 +#define GEN5_REGISTER_TYPE_V 6 /* packed int vector, immediates only, uword dest only */ +#define GEN5_REGISTER_TYPE_F 7 + +#define GEN5_ARF_NULL 0x00 +#define GEN5_ARF_ADDRESS 0x10 +#define GEN5_ARF_ACCUMULATOR 0x20 +#define GEN5_ARF_FLAG 0x30 +#define GEN5_ARF_MASK 0x40 +#define GEN5_ARF_MASK_STACK 0x50 +#define GEN5_ARF_MASK_STACK_DEPTH 0x60 +#define GEN5_ARF_STATE 0x70 +#define GEN5_ARF_CONTROL 0x80 +#define GEN5_ARF_NOTIFICATION_COUNT 0x90 +#define GEN5_ARF_IP 0xA0 + +#define GEN5_AMASK 0 +#define GEN5_IMASK 1 +#define GEN5_LMASK 2 +#define GEN5_CMASK 3 + + + +#define GEN5_THREAD_NORMAL 0 +#define GEN5_THREAD_ATOMIC 1 +#define GEN5_THREAD_SWITCH 2 + +#define GEN5_VERTICAL_STRIDE_0 0 +#define GEN5_VERTICAL_STRIDE_1 1 +#define GEN5_VERTICAL_STRIDE_2 2 +#define GEN5_VERTICAL_STRIDE_4 3 +#define GEN5_VERTICAL_STRIDE_8 4 +#define GEN5_VERTICAL_STRIDE_16 5 +#define GEN5_VERTICAL_STRIDE_32 6 +#define GEN5_VERTICAL_STRIDE_64 7 +#define GEN5_VERTICAL_STRIDE_128 8 +#define GEN5_VERTICAL_STRIDE_256 9 +#define GEN5_VERTICAL_STRIDE_ONE_DIMENSIONAL 0xF + +#define GEN5_WIDTH_1 0 +#define GEN5_WIDTH_2 1 +#define GEN5_WIDTH_4 2 +#define GEN5_WIDTH_8 3 +#define GEN5_WIDTH_16 4 + +#define GEN5_STATELESS_BUFFER_BOUNDARY_1K 0 +#define GEN5_STATELESS_BUFFER_BOUNDARY_2K 1 +#define GEN5_STATELESS_BUFFER_BOUNDARY_4K 2 +#define GEN5_STATELESS_BUFFER_BOUNDARY_8K 3 +#define GEN5_STATELESS_BUFFER_BOUNDARY_16K 4 +#define GEN5_STATELESS_BUFFER_BOUNDARY_32K 5 +#define GEN5_STATELESS_BUFFER_BOUNDARY_64K 6 +#define GEN5_STATELESS_BUFFER_BOUNDARY_128K 7 +#define GEN5_STATELESS_BUFFER_BOUNDARY_256K 8 +#define GEN5_STATELESS_BUFFER_BOUNDARY_512K 9 +#define GEN5_STATELESS_BUFFER_BOUNDARY_1M 10 +#define GEN5_STATELESS_BUFFER_BOUNDARY_2M 11 + +#define GEN5_POLYGON_FACING_FRONT 0 +#define GEN5_POLYGON_FACING_BACK 1 + +#define GEN5_MESSAGE_TARGET_NULL 0 +#define GEN5_MESSAGE_TARGET_MATH 1 +#define GEN5_MESSAGE_TARGET_SAMPLER 2 +#define GEN5_MESSAGE_TARGET_GATEWAY 3 +#define GEN5_MESSAGE_TARGET_DATAPORT_READ 4 +#define GEN5_MESSAGE_TARGET_DATAPORT_WRITE 5 +#define GEN5_MESSAGE_TARGET_URB 6 +#define GEN5_MESSAGE_TARGET_THREAD_SPAWNER 7 + +#define GEN5_SAMPLER_RETURN_FORMAT_FLOAT32 0 +#define GEN5_SAMPLER_RETURN_FORMAT_UINT32 2 +#define GEN5_SAMPLER_RETURN_FORMAT_SINT32 3 + +#define GEN5_SAMPLER_MESSAGE_SIMD8_SAMPLE 0 +#define GEN5_SAMPLER_MESSAGE_SIMD16_SAMPLE 0 +#define GEN5_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS 0 +#define GEN5_SAMPLER_MESSAGE_SIMD8_KILLPIX 1 +#define GEN5_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD 1 +#define GEN5_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD 1 +#define GEN5_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_GRADIENTS 2 +#define GEN5_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS 2 +#define GEN5_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_COMPARE 0 +#define GEN5_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE 2 +#define GEN5_SAMPLER_MESSAGE_SIMD4X2_RESINFO 2 +#define GEN5_SAMPLER_MESSAGE_SIMD8_RESINFO 2 +#define GEN5_SAMPLER_MESSAGE_SIMD16_RESINFO 2 +#define GEN5_SAMPLER_MESSAGE_SIMD4X2_LD 3 +#define GEN5_SAMPLER_MESSAGE_SIMD8_LD 3 +#define GEN5_SAMPLER_MESSAGE_SIMD16_LD 3 + +#define GEN5_DATAPORT_OWORD_BLOCK_1_OWORDLOW 0 +#define GEN5_DATAPORT_OWORD_BLOCK_1_OWORDHIGH 1 +#define GEN5_DATAPORT_OWORD_BLOCK_2_OWORDS 2 +#define GEN5_DATAPORT_OWORD_BLOCK_4_OWORDS 3 +#define GEN5_DATAPORT_OWORD_BLOCK_8_OWORDS 4 + +#define GEN5_DATAPORT_OWORD_DUAL_BLOCK_1OWORD 0 +#define GEN5_DATAPORT_OWORD_DUAL_BLOCK_4OWORDS 2 + +#define GEN5_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS 2 +#define GEN5_DATAPORT_DWORD_SCATTERED_BLOCK_16DWORDS 3 + +#define GEN5_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ 0 +#define GEN5_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 1 +#define GEN5_DATAPORT_READ_MESSAGE_DWORD_BLOCK_READ 2 +#define GEN5_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 3 + +#define GEN5_DATAPORT_READ_TARGET_DATA_CACHE 0 +#define GEN5_DATAPORT_READ_TARGET_RENDER_CACHE 1 +#define GEN5_DATAPORT_READ_TARGET_SAMPLER_CACHE 2 + +#define GEN5_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE 0 +#define GEN5_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED 1 +#define GEN5_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01 2 +#define GEN5_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN23 3 +#define GEN5_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01 4 + +#define GEN5_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE 0 +#define GEN5_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE 1 +#define GEN5_DATAPORT_WRITE_MESSAGE_DWORD_BLOCK_WRITE 2 +#define GEN5_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE 3 +#define GEN5_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE 4 +#define GEN5_DATAPORT_WRITE_MESSAGE_STREAMED_VERTEX_BUFFER_WRITE 5 +#define GEN5_DATAPORT_WRITE_MESSAGE_FLUSH_RENDER_CACHE 7 + +#define GEN5_MATH_FUNCTION_INV 1 +#define GEN5_MATH_FUNCTION_LOG 2 +#define GEN5_MATH_FUNCTION_EXP 3 +#define GEN5_MATH_FUNCTION_SQRT 4 +#define GEN5_MATH_FUNCTION_RSQ 5 +#define GEN5_MATH_FUNCTION_SIN 6 /* was 7 */ +#define GEN5_MATH_FUNCTION_COS 7 /* was 8 */ +#define GEN5_MATH_FUNCTION_SINCOS 8 /* was 6 */ +#define GEN5_MATH_FUNCTION_TAN 9 +#define GEN5_MATH_FUNCTION_POW 10 +#define GEN5_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER 11 +#define GEN5_MATH_FUNCTION_INT_DIV_QUOTIENT 12 +#define GEN5_MATH_FUNCTION_INT_DIV_REMAINDER 13 + +#define GEN5_MATH_INTEGER_UNSIGNED 0 +#define GEN5_MATH_INTEGER_SIGNED 1 + +#define GEN5_MATH_PRECISION_FULL 0 +#define GEN5_MATH_PRECISION_PARTIAL 1 + +#define GEN5_MATH_SATURATE_NONE 0 +#define GEN5_MATH_SATURATE_SATURATE 1 + +#define GEN5_MATH_DATA_VECTOR 0 +#define GEN5_MATH_DATA_SCALAR 1 + +#define GEN5_URB_OPCODE_WRITE 0 + +#define GEN5_URB_SWIZZLE_NONE 0 +#define GEN5_URB_SWIZZLE_INTERLEAVE 1 +#define GEN5_URB_SWIZZLE_TRANSPOSE 2 + +#define GEN5_SCRATCH_SPACE_SIZE_1K 0 +#define GEN5_SCRATCH_SPACE_SIZE_2K 1 +#define GEN5_SCRATCH_SPACE_SIZE_4K 2 +#define GEN5_SCRATCH_SPACE_SIZE_8K 3 +#define GEN5_SCRATCH_SPACE_SIZE_16K 4 +#define GEN5_SCRATCH_SPACE_SIZE_32K 5 +#define GEN5_SCRATCH_SPACE_SIZE_64K 6 +#define GEN5_SCRATCH_SPACE_SIZE_128K 7 +#define GEN5_SCRATCH_SPACE_SIZE_256K 8 +#define GEN5_SCRATCH_SPACE_SIZE_512K 9 +#define GEN5_SCRATCH_SPACE_SIZE_1M 10 +#define GEN5_SCRATCH_SPACE_SIZE_2M 11 + + + + +#define CMD_URB_FENCE 0x6000 +#define CMD_CONST_BUFFER_STATE 0x6001 +#define CMD_CONST_BUFFER 0x6002 + +#define CMD_STATE_BASE_ADDRESS 0x6101 +#define CMD_STATE_INSN_POINTER 0x6102 +#define CMD_PIPELINE_SELECT 0x6104 + +#define CMD_PIPELINED_STATE_POINTERS 0x7800 +#define CMD_BINDING_TABLE_PTRS 0x7801 +#define CMD_VERTEX_BUFFER 0x7808 +#define CMD_VERTEX_ELEMENT 0x7809 +#define CMD_INDEX_BUFFER 0x780a +#define CMD_VF_STATISTICS 0x780b + +#define CMD_DRAW_RECT 0x7900 +#define CMD_BLEND_CONSTANT_COLOR 0x7901 +#define CMD_CHROMA_KEY 0x7904 +#define CMD_DEPTH_BUFFER 0x7905 +#define CMD_POLY_STIPPLE_OFFSET 0x7906 +#define CMD_POLY_STIPPLE_PATTERN 0x7907 +#define CMD_LINE_STIPPLE_PATTERN 0x7908 +#define CMD_GLOBAL_DEPTH_OFFSET_CLAMP 0x7908 + +#define CMD_PIPE_CONTROL 0x7a00 + +#define CMD_3D_PRIM 0x7b00 + +#define CMD_MI_FLUSH 0x0200 + + +/* Various values from the R0 vertex header: + */ +#define R02_PRIM_END 0x1 +#define R02_PRIM_START 0x2 + +/* media pipeline */ + +#define GEN5_VFE_MODE_GENERIC 0x0 +#define GEN5_VFE_MODE_VLD_MPEG2 0x1 +#define GEN5_VFE_MODE_IS 0x2 +#define GEN5_VFE_MODE_AVC_MC 0x4 +#define GEN5_VFE_MODE_AVC_IT 0x7 +#define GEN5_VFE_MODE_VC1_IT 0xB + +#define GEN5_VFE_DEBUG_COUNTER_FREE 0 +#define GEN5_VFE_DEBUG_COUNTER_FROZEN 1 +#define GEN5_VFE_DEBUG_COUNTER_ONCE 2 +#define GEN5_VFE_DEBUG_COUNTER_ALWAYS 3 + +/* VLD_STATE */ +#define GEN5_MPEG_TOP_FIELD 1 +#define GEN5_MPEG_BOTTOM_FIELD 2 +#define GEN5_MPEG_FRAME 3 +#define GEN5_MPEG_QSCALE_LINEAR 0 +#define GEN5_MPEG_QSCALE_NONLINEAR 1 +#define GEN5_MPEG_ZIGZAG_SCAN 0 +#define GEN5_MPEG_ALTER_VERTICAL_SCAN 1 +#define GEN5_MPEG_I_PICTURE 1 +#define GEN5_MPEG_P_PICTURE 2 +#define GEN5_MPEG_B_PICTURE 3 + +/* Command packets: + */ +struct header +{ + unsigned int length:16; + unsigned int opcode:16; +}; + + +union header_union +{ + struct header bits; + unsigned int dword; +}; + +struct gen5_3d_control +{ + struct + { + unsigned int length:8; + unsigned int notify_enable:1; + unsigned int pad:3; + unsigned int wc_flush_enable:1; + unsigned int depth_stall_enable:1; + unsigned int operation:2; + unsigned int opcode:16; + } header; + + struct + { + unsigned int pad:2; + unsigned int dest_addr_type:1; + unsigned int dest_addr:29; + } dest; + + unsigned int dword2; + unsigned int dword3; +}; + + +struct gen5_3d_primitive +{ + struct + { + unsigned int length:8; + unsigned int pad:2; + unsigned int topology:5; + unsigned int indexed:1; + unsigned int opcode:16; + } header; + + unsigned int verts_per_instance; + unsigned int start_vert_location; + unsigned int instance_count; + unsigned int start_instance_location; + unsigned int base_vert_location; +}; + +/* These seem to be passed around as function args, so it works out + * better to keep them as #defines: + */ +#define GEN5_FLUSH_READ_CACHE 0x1 +#define GEN5_FLUSH_STATE_CACHE 0x2 +#define GEN5_INHIBIT_FLUSH_RENDER_CACHE 0x4 +#define GEN5_FLUSH_SNAPSHOT_COUNTERS 0x8 + +struct gen5_mi_flush +{ + unsigned int flags:4; + unsigned int pad:12; + unsigned int opcode:16; +}; + +struct gen5_vf_statistics +{ + unsigned int statistics_enable:1; + unsigned int pad:15; + unsigned int opcode:16; +}; + + + +struct gen5_binding_table_pointers +{ + struct header header; + unsigned int vs; + unsigned int gs; + unsigned int clp; + unsigned int sf; + unsigned int wm; +}; + + +struct gen5_blend_constant_color +{ + struct header header; + float blend_constant_color[4]; +}; + + +struct gen5_depthbuffer +{ + union header_union header; + + union { + struct { + unsigned int pitch:18; + unsigned int format:3; + unsigned int pad:4; + unsigned int depth_offset_disable:1; + unsigned int tile_walk:1; + unsigned int tiled_surface:1; + unsigned int pad2:1; + unsigned int surface_type:3; + } bits; + unsigned int dword; + } dword1; + + unsigned int dword2_base_addr; + + union { + struct { + unsigned int pad:1; + unsigned int mipmap_layout:1; + unsigned int lod:4; + unsigned int width:13; + unsigned int height:13; + } bits; + unsigned int dword; + } dword3; + + union { + struct { + unsigned int pad:12; + unsigned int min_array_element:9; + unsigned int depth:11; + } bits; + unsigned int dword; + } dword4; +}; + +struct gen5_drawrect +{ + struct header header; + unsigned int xmin:16; + unsigned int ymin:16; + unsigned int xmax:16; + unsigned int ymax:16; + unsigned int xorg:16; + unsigned int yorg:16; +}; + + + + +struct gen5_global_depth_offset_clamp +{ + struct header header; + float depth_offset_clamp; +}; + +struct gen5_indexbuffer +{ + union { + struct + { + unsigned int length:8; + unsigned int index_format:2; + unsigned int cut_index_enable:1; + unsigned int pad:5; + unsigned int opcode:16; + } bits; + unsigned int dword; + + } header; + + unsigned int buffer_start; + unsigned int buffer_end; +}; + + +struct gen5_line_stipple +{ + struct header header; + + struct + { + unsigned int pattern:16; + unsigned int pad:16; + } bits0; + + struct + { + unsigned int repeat_count:9; + unsigned int pad:7; + unsigned int inverse_repeat_count:16; + } bits1; +}; + + +struct gen5_pipelined_state_pointers +{ + struct header header; + + struct { + unsigned int pad:5; + unsigned int offset:27; + } vs; + + struct + { + unsigned int enable:1; + unsigned int pad:4; + unsigned int offset:27; + } gs; + + struct + { + unsigned int enable:1; + unsigned int pad:4; + unsigned int offset:27; + } clp; + + struct + { + unsigned int pad:5; + unsigned int offset:27; + } sf; + + struct + { + unsigned int pad:5; + unsigned int offset:27; + } wm; + + struct + { + unsigned int pad:5; + unsigned int offset:27; /* KW: check me! */ + } cc; +}; + + +struct gen5_polygon_stipple_offset +{ + struct header header; + + struct { + unsigned int y_offset:5; + unsigned int pad:3; + unsigned int x_offset:5; + unsigned int pad0:19; + } bits0; +}; + + + +struct gen5_polygon_stipple +{ + struct header header; + unsigned int stipple[32]; +}; + + + +struct gen5_pipeline_select +{ + struct + { + unsigned int pipeline_select:1; + unsigned int pad:15; + unsigned int opcode:16; + } header; +}; + + +struct gen5_pipe_control +{ + struct + { + unsigned int length:8; + unsigned int notify_enable:1; + unsigned int pad:2; + unsigned int instruction_state_cache_flush_enable:1; + unsigned int write_cache_flush_enable:1; + unsigned int depth_stall_enable:1; + unsigned int post_sync_operation:2; + + unsigned int opcode:16; + } header; + + struct + { + unsigned int pad:2; + unsigned int dest_addr_type:1; + unsigned int dest_addr:29; + } bits1; + + unsigned int data0; + unsigned int data1; +}; + + +struct gen5_urb_fence +{ + struct + { + unsigned int length:8; + unsigned int vs_realloc:1; + unsigned int gs_realloc:1; + unsigned int clp_realloc:1; + unsigned int sf_realloc:1; + unsigned int vfe_realloc:1; + unsigned int cs_realloc:1; + unsigned int pad:2; + unsigned int opcode:16; + } header; + + struct + { + unsigned int vs_fence:10; + unsigned int gs_fence:10; + unsigned int clp_fence:10; + unsigned int pad:2; + } bits0; + + struct + { + unsigned int sf_fence:10; + unsigned int vf_fence:10; + unsigned int cs_fence:10; + unsigned int pad:2; + } bits1; +}; + +struct gen5_constant_buffer_state /* previously gen5_command_streamer */ +{ + struct header header; + + struct + { + unsigned int nr_urb_entries:3; + unsigned int pad:1; + unsigned int urb_entry_size:5; + unsigned int pad0:23; + } bits0; +}; + +struct gen5_constant_buffer +{ + struct + { + unsigned int length:8; + unsigned int valid:1; + unsigned int pad:7; + unsigned int opcode:16; + } header; + + struct + { + unsigned int buffer_length:6; + unsigned int buffer_address:26; + } bits0; +}; + +struct gen5_state_base_address +{ + struct header header; + + struct + { + unsigned int modify_enable:1; + unsigned int pad:4; + unsigned int general_state_address:27; + } bits0; + + struct + { + unsigned int modify_enable:1; + unsigned int pad:4; + unsigned int surface_state_address:27; + } bits1; + + struct + { + unsigned int modify_enable:1; + unsigned int pad:4; + unsigned int indirect_object_state_address:27; + } bits2; + + struct + { + unsigned int modify_enable:1; + unsigned int pad:11; + unsigned int general_state_upper_bound:20; + } bits3; + + struct + { + unsigned int modify_enable:1; + unsigned int pad:11; + unsigned int indirect_object_state_upper_bound:20; + } bits4; +}; + +struct gen5_state_prefetch +{ + struct header header; + + struct + { + unsigned int prefetch_count:3; + unsigned int pad:3; + unsigned int prefetch_pointer:26; + } bits0; +}; + +struct gen5_system_instruction_pointer +{ + struct header header; + + struct + { + unsigned int pad:4; + unsigned int system_instruction_pointer:28; + } bits0; +}; + + + + +/* State structs for the various fixed function units: + */ + + +struct thread0 +{ + unsigned int pad0:1; + unsigned int grf_reg_count:3; + unsigned int pad1:2; + unsigned int kernel_start_pointer:26; +}; + +struct thread1 +{ + unsigned int ext_halt_exception_enable:1; + unsigned int sw_exception_enable:1; + unsigned int mask_stack_exception_enable:1; + unsigned int timeout_exception_enable:1; + unsigned int illegal_op_exception_enable:1; + unsigned int pad0:3; + unsigned int depth_coef_urb_read_offset:6; /* WM only */ + unsigned int pad1:2; + unsigned int floating_point_mode:1; + unsigned int thread_priority:1; + unsigned int binding_table_entry_count:8; + unsigned int pad3:5; + unsigned int single_program_flow:1; +}; + +struct thread2 +{ + unsigned int per_thread_scratch_space:4; + unsigned int pad0:6; + unsigned int scratch_space_base_pointer:22; +}; + + +struct thread3 +{ + unsigned int dispatch_grf_start_reg:4; + unsigned int urb_entry_read_offset:6; + unsigned int pad0:1; + unsigned int urb_entry_read_length:6; + unsigned int pad1:1; + unsigned int const_urb_entry_read_offset:6; + unsigned int pad2:1; + unsigned int const_urb_entry_read_length:6; + unsigned int pad3:1; +}; + + + +struct gen5_clip_unit_state +{ + struct thread0 thread0; + struct thread1 thread1; + struct thread2 thread2; + struct thread3 thread3; + + struct + { + unsigned int pad0:9; + unsigned int gs_output_stats:1; /* not always */ + unsigned int stats_enable:1; + unsigned int nr_urb_entries:7; + unsigned int pad1:1; + unsigned int urb_entry_allocation_size:5; + unsigned int pad2:1; + unsigned int max_threads:6; /* may be less */ + unsigned int pad3:1; + } thread4; + + struct + { + unsigned int pad0:13; + unsigned int clip_mode:3; + unsigned int userclip_enable_flags:8; + unsigned int userclip_must_clip:1; + unsigned int pad1:1; + unsigned int guard_band_enable:1; + unsigned int viewport_z_clip_enable:1; + unsigned int viewport_xy_clip_enable:1; + unsigned int vertex_position_space:1; + unsigned int api_mode:1; + unsigned int pad2:1; + } clip5; + + struct + { + unsigned int pad0:5; + unsigned int clipper_viewport_state_ptr:27; + } clip6; + + + float viewport_xmin; + float viewport_xmax; + float viewport_ymin; + float viewport_ymax; +}; + + + +struct gen5_cc_unit_state +{ + struct + { + unsigned int pad0:3; + unsigned int bf_stencil_pass_depth_pass_op:3; + unsigned int bf_stencil_pass_depth_fail_op:3; + unsigned int bf_stencil_fail_op:3; + unsigned int bf_stencil_func:3; + unsigned int bf_stencil_enable:1; + unsigned int pad1:2; + unsigned int stencil_write_enable:1; + unsigned int stencil_pass_depth_pass_op:3; + unsigned int stencil_pass_depth_fail_op:3; + unsigned int stencil_fail_op:3; + unsigned int stencil_func:3; + unsigned int stencil_enable:1; + } cc0; + + + struct + { + unsigned int bf_stencil_ref:8; + unsigned int stencil_write_mask:8; + unsigned int stencil_test_mask:8; + unsigned int stencil_ref:8; + } cc1; + + + struct + { + unsigned int logicop_enable:1; + unsigned int pad0:10; + unsigned int depth_write_enable:1; + unsigned int depth_test_function:3; + unsigned int depth_test:1; + unsigned int bf_stencil_write_mask:8; + unsigned int bf_stencil_test_mask:8; + } cc2; + + + struct + { + unsigned int pad0:8; + unsigned int alpha_test_func:3; + unsigned int alpha_test:1; + unsigned int blend_enable:1; + unsigned int ia_blend_enable:1; + unsigned int pad1:1; + unsigned int alpha_test_format:1; + unsigned int pad2:16; + } cc3; + + struct + { + unsigned int pad0:5; + unsigned int cc_viewport_state_offset:27; + } cc4; + + struct + { + unsigned int pad0:2; + unsigned int ia_dest_blend_factor:5; + unsigned int ia_src_blend_factor:5; + unsigned int ia_blend_function:3; + unsigned int statistics_enable:1; + unsigned int logicop_func:4; + unsigned int pad1:11; + unsigned int dither_enable:1; + } cc5; + + struct + { + unsigned int clamp_post_alpha_blend:1; + unsigned int clamp_pre_alpha_blend:1; + unsigned int clamp_range:2; + unsigned int pad0:11; + unsigned int y_dither_offset:2; + unsigned int x_dither_offset:2; + unsigned int dest_blend_factor:5; + unsigned int src_blend_factor:5; + unsigned int blend_function:3; + } cc6; + + struct { + union { + float f; + unsigned char ub[4]; + } alpha_ref; + } cc7; +}; + + + +struct gen5_sf_unit_state +{ + struct thread0 thread0; + struct { + unsigned int pad0:7; + unsigned int sw_exception_enable:1; + unsigned int pad1:3; + unsigned int mask_stack_exception_enable:1; + unsigned int pad2:1; + unsigned int illegal_op_exception_enable:1; + unsigned int pad3:2; + unsigned int floating_point_mode:1; + unsigned int thread_priority:1; + unsigned int binding_table_entry_count:8; + unsigned int pad4:5; + unsigned int single_program_flow:1; + } sf1; + + struct thread2 thread2; + struct thread3 thread3; + + struct + { + unsigned int pad0:10; + unsigned int stats_enable:1; + unsigned int nr_urb_entries:7; + unsigned int pad1:1; + unsigned int urb_entry_allocation_size:5; + unsigned int pad2:1; + unsigned int max_threads:6; + unsigned int pad3:1; + } thread4; + + struct + { + unsigned int front_winding:1; + unsigned int viewport_transform:1; + unsigned int pad0:3; + unsigned int sf_viewport_state_offset:27; + } sf5; + + struct + { + unsigned int pad0:9; + unsigned int dest_org_vbias:4; + unsigned int dest_org_hbias:4; + unsigned int scissor:1; + unsigned int disable_2x2_trifilter:1; + unsigned int disable_zero_pix_trifilter:1; + unsigned int point_rast_rule:2; + unsigned int line_endcap_aa_region_width:2; + unsigned int line_width:4; + unsigned int fast_scissor_disable:1; + unsigned int cull_mode:2; + unsigned int aa_enable:1; + } sf6; + + struct + { + unsigned int point_size:11; + unsigned int use_point_size_state:1; + unsigned int subpixel_precision:1; + unsigned int sprite_point:1; + unsigned int pad0:11; + unsigned int trifan_pv:2; + unsigned int linestrip_pv:2; + unsigned int tristrip_pv:2; + unsigned int line_last_pixel_enable:1; + } sf7; + +}; + + +struct gen5_gs_unit_state +{ + struct thread0 thread0; + struct thread1 thread1; + struct thread2 thread2; + struct thread3 thread3; + + struct + { + unsigned int pad0:10; + unsigned int stats_enable:1; + unsigned int nr_urb_entries:7; + unsigned int pad1:1; + unsigned int urb_entry_allocation_size:5; + unsigned int pad2:1; + unsigned int max_threads:1; + unsigned int pad3:6; + } thread4; + + struct + { + unsigned int sampler_count:3; + unsigned int pad0:2; + unsigned int sampler_state_pointer:27; + } gs5; + + + struct + { + unsigned int max_vp_index:4; + unsigned int pad0:26; + unsigned int reorder_enable:1; + unsigned int pad1:1; + } gs6; +}; + + +struct gen5_vs_unit_state +{ + struct thread0 thread0; + struct thread1 thread1; + struct thread2 thread2; + struct thread3 thread3; + + struct + { + unsigned int pad0:10; + unsigned int stats_enable:1; + unsigned int nr_urb_entries:7; + unsigned int pad1:1; + unsigned int urb_entry_allocation_size:5; + unsigned int pad2:1; + unsigned int max_threads:4; + unsigned int pad3:3; + } thread4; + + struct + { + unsigned int sampler_count:3; + unsigned int pad0:2; + unsigned int sampler_state_pointer:27; + } vs5; + + struct + { + unsigned int vs_enable:1; + unsigned int vert_cache_disable:1; + unsigned int pad0:30; + } vs6; +}; + + +struct gen5_wm_unit_state +{ + struct thread0 thread0; + struct thread1 thread1; + struct thread2 thread2; + struct thread3 thread3; + + struct { + unsigned int stats_enable:1; + unsigned int pad0:1; + unsigned int sampler_count:3; + unsigned int sampler_state_pointer:27; + } wm4; + + struct + { + unsigned int enable_8_pix:1; + unsigned int enable_16_pix:1; + unsigned int enable_32_pix:1; + unsigned int pad0:7; + unsigned int legacy_global_depth_bias:1; + unsigned int line_stipple:1; + unsigned int depth_offset:1; + unsigned int polygon_stipple:1; + unsigned int line_aa_region_width:2; + unsigned int line_endcap_aa_region_width:2; + unsigned int early_depth_test:1; + unsigned int thread_dispatch_enable:1; + unsigned int program_uses_depth:1; + unsigned int program_computes_depth:1; + unsigned int program_uses_killpixel:1; + unsigned int legacy_line_rast: 1; + unsigned int transposed_urb_read:1; + unsigned int max_threads:7; + } wm5; + + float global_depth_offset_constant; + float global_depth_offset_scale; + + struct { + unsigned int pad0:1; + unsigned int grf_reg_count_1:3; + unsigned int pad1:2; + unsigned int kernel_start_pointer_1:26; + } wm8; + + struct { + unsigned int pad0:1; + unsigned int grf_reg_count_2:3; + unsigned int pad1:2; + unsigned int kernel_start_pointer_2:26; + } wm9; + + struct { + unsigned int pad0:1; + unsigned int grf_reg_count_3:3; + unsigned int pad1:2; + unsigned int kernel_start_pointer_3:26; + } wm10; +}; + +struct gen5_wm_unit_state_padded { + struct gen5_wm_unit_state state; + char pad[64 - sizeof(struct gen5_wm_unit_state)]; +}; + +/* The hardware supports two different modes for border color. The + * default (OpenGL) mode uses floating-point color channels, while the + * legacy mode uses 4 bytes. + * + * More significantly, the legacy mode respects the components of the + * border color for channels not present in the source, (whereas the + * default mode will ignore the border color's alpha channel and use + * alpha==1 for an RGB source, for example). + * + * The legacy mode matches the semantics specified by the Render + * extension. + */ +struct gen5_sampler_default_border_color { + float color[4]; +}; + +struct gen5_sampler_legacy_border_color { + uint8_t color[4]; +}; + +struct gen5_sampler_state +{ + + struct + { + unsigned int shadow_function:3; + unsigned int lod_bias:11; + unsigned int min_filter:3; + unsigned int mag_filter:3; + unsigned int mip_filter:2; + unsigned int base_level:5; + unsigned int pad:1; + unsigned int lod_preclamp:1; + unsigned int border_color_mode:1; + unsigned int pad0:1; + unsigned int disable:1; + } ss0; + + struct + { + unsigned int r_wrap_mode:3; + unsigned int t_wrap_mode:3; + unsigned int s_wrap_mode:3; + unsigned int pad:3; + unsigned int max_lod:10; + unsigned int min_lod:10; + } ss1; + + + struct + { + unsigned int pad:5; + unsigned int border_color_pointer:27; + } ss2; + + struct + { + unsigned int pad:19; + unsigned int max_aniso:3; + unsigned int chroma_key_mode:1; + unsigned int chroma_key_index:2; + unsigned int chroma_key_enable:1; + unsigned int monochrome_filter_width:3; + unsigned int monochrome_filter_height:3; + } ss3; +}; + + +struct gen5_clipper_viewport +{ + float xmin; + float xmax; + float ymin; + float ymax; +}; + +struct gen5_cc_viewport +{ + float min_depth; + float max_depth; +}; + +struct gen5_sf_viewport +{ + struct { + float m00; + float m11; + float m22; + float m30; + float m31; + float m32; + } viewport; + + struct { + short xmin; + short ymin; + short xmax; + short ymax; + } scissor; +}; + +/* Documented in the subsystem/shared-functions/sampler chapter... + */ +struct gen5_surface_state +{ + struct { + unsigned int cube_pos_z:1; + unsigned int cube_neg_z:1; + unsigned int cube_pos_y:1; + unsigned int cube_neg_y:1; + unsigned int cube_pos_x:1; + unsigned int cube_neg_x:1; + unsigned int pad:3; + unsigned int render_cache_read_mode:1; + unsigned int mipmap_layout_mode:1; + unsigned int vert_line_stride_ofs:1; + unsigned int vert_line_stride:1; + unsigned int color_blend:1; + unsigned int writedisable_blue:1; + unsigned int writedisable_green:1; + unsigned int writedisable_red:1; + unsigned int writedisable_alpha:1; + unsigned int surface_format:9; + unsigned int data_return_format:1; + unsigned int pad0:1; + unsigned int surface_type:3; + } ss0; + + struct { + unsigned int base_addr; + } ss1; + + struct { + unsigned int render_target_rotation:2; + unsigned int mip_count:4; + unsigned int width:13; + unsigned int height:13; + } ss2; + + struct { + unsigned int tile_walk:1; + unsigned int tiled_surface:1; + unsigned int pad:1; + unsigned int pitch:18; + unsigned int depth:11; + } ss3; + + struct { + unsigned int pad:19; + unsigned int min_array_elt:9; + unsigned int min_lod:4; + } ss4; + + struct { + unsigned int pad:20; + unsigned int y_offset:4; + unsigned int pad2:1; + unsigned int x_offset:7; + } ss5; +}; + + + +struct gen5_vertex_buffer_state +{ + struct { + unsigned int pitch:11; + unsigned int pad:15; + unsigned int access_type:1; + unsigned int vb_index:5; + } vb0; + + unsigned int start_addr; + unsigned int max_index; +#if 1 + unsigned int instance_data_step_rate; /* not included for sequential/random vertices? */ +#endif +}; + +#define GEN5_VBP_MAX 17 + +struct gen5_vb_array_state { + struct header header; + struct gen5_vertex_buffer_state vb[GEN5_VBP_MAX]; +}; + + +struct gen5_vertex_element_state +{ + struct + { + unsigned int src_offset:11; + unsigned int pad:5; + unsigned int src_format:9; + unsigned int pad0:1; + unsigned int valid:1; + unsigned int vertex_buffer_index:5; + } ve0; + + struct + { + unsigned int dst_offset:8; + unsigned int pad:8; + unsigned int vfcomponent3:4; + unsigned int vfcomponent2:4; + unsigned int vfcomponent1:4; + unsigned int vfcomponent0:4; + } ve1; +}; + +#define GEN5_VEP_MAX 18 + +struct gen5_vertex_element_packet { + struct header header; + struct gen5_vertex_element_state ve[GEN5_VEP_MAX]; /* note: less than _TNL_ATTRIB_MAX */ +}; + + +struct gen5_urb_immediate { + unsigned int opcode:4; + unsigned int offset:6; + unsigned int swizzle_control:2; + unsigned int pad:1; + unsigned int allocate:1; + unsigned int used:1; + unsigned int complete:1; + unsigned int response_length:4; + unsigned int msg_length:4; + unsigned int msg_target:4; + unsigned int pad1:3; + unsigned int end_of_thread:1; +}; + +/* Instruction format for the execution units: + */ + +struct gen5_instruction +{ + struct + { + unsigned int opcode:7; + unsigned int pad:1; + unsigned int access_mode:1; + unsigned int mask_control:1; + unsigned int dependency_control:2; + unsigned int compression_control:2; + unsigned int thread_control:2; + unsigned int predicate_control:4; + unsigned int predicate_inverse:1; + unsigned int execution_size:3; + unsigned int destreg__conditonalmod:4; /* destreg - send, conditionalmod - others */ + unsigned int pad0:2; + unsigned int debug_control:1; + unsigned int saturate:1; + } header; + + union { + struct + { + unsigned int dest_reg_file:2; + unsigned int dest_reg_type:3; + unsigned int src0_reg_file:2; + unsigned int src0_reg_type:3; + unsigned int src1_reg_file:2; + unsigned int src1_reg_type:3; + unsigned int pad:1; + unsigned int dest_subreg_nr:5; + unsigned int dest_reg_nr:8; + unsigned int dest_horiz_stride:2; + unsigned int dest_address_mode:1; + } da1; + + struct + { + unsigned int dest_reg_file:2; + unsigned int dest_reg_type:3; + unsigned int src0_reg_file:2; + unsigned int src0_reg_type:3; + unsigned int pad:6; + int dest_indirect_offset:10; /* offset against the deref'd address reg */ + unsigned int dest_subreg_nr:3; /* subnr for the address reg a0.x */ + unsigned int dest_horiz_stride:2; + unsigned int dest_address_mode:1; + } ia1; + + struct + { + unsigned int dest_reg_file:2; + unsigned int dest_reg_type:3; + unsigned int src0_reg_file:2; + unsigned int src0_reg_type:3; + unsigned int src1_reg_file:2; + unsigned int src1_reg_type:3; + unsigned int pad0:1; + unsigned int dest_writemask:4; + unsigned int dest_subreg_nr:1; + unsigned int dest_reg_nr:8; + unsigned int pad1:2; + unsigned int dest_address_mode:1; + } da16; + + struct + { + unsigned int dest_reg_file:2; + unsigned int dest_reg_type:3; + unsigned int src0_reg_file:2; + unsigned int src0_reg_type:3; + unsigned int pad0:6; + unsigned int dest_writemask:4; + int dest_indirect_offset:6; + unsigned int dest_subreg_nr:3; + unsigned int pad1:2; + unsigned int dest_address_mode:1; + } ia16; + } bits1; + + + union { + struct + { + unsigned int src0_subreg_nr:5; + unsigned int src0_reg_nr:8; + unsigned int src0_abs:1; + unsigned int src0_negate:1; + unsigned int src0_address_mode:1; + unsigned int src0_horiz_stride:2; + unsigned int src0_width:3; + unsigned int src0_vert_stride:4; + unsigned int flag_reg_nr:1; + unsigned int pad:6; + } da1; + + struct + { + int src0_indirect_offset:10; + unsigned int src0_subreg_nr:3; + unsigned int src0_abs:1; + unsigned int src0_negate:1; + unsigned int src0_address_mode:1; + unsigned int src0_horiz_stride:2; + unsigned int src0_width:3; + unsigned int src0_vert_stride:4; + unsigned int flag_reg_nr:1; + unsigned int pad:6; + } ia1; + + struct + { + unsigned int src0_swz_x:2; + unsigned int src0_swz_y:2; + unsigned int src0_subreg_nr:1; + unsigned int src0_reg_nr:8; + unsigned int src0_abs:1; + unsigned int src0_negate:1; + unsigned int src0_address_mode:1; + unsigned int src0_swz_z:2; + unsigned int src0_swz_w:2; + unsigned int pad0:1; + unsigned int src0_vert_stride:4; + unsigned int flag_reg_nr:1; + unsigned int pad1:6; + } da16; + + struct + { + unsigned int src0_swz_x:2; + unsigned int src0_swz_y:2; + int src0_indirect_offset:6; + unsigned int src0_subreg_nr:3; + unsigned int src0_abs:1; + unsigned int src0_negate:1; + unsigned int src0_address_mode:1; + unsigned int src0_swz_z:2; + unsigned int src0_swz_w:2; + unsigned int pad0:1; + unsigned int src0_vert_stride:4; + unsigned int flag_reg_nr:1; + unsigned int pad1:6; + } ia16; + + } bits2; + + union + { + struct + { + unsigned int src1_subreg_nr:5; + unsigned int src1_reg_nr:8; + unsigned int src1_abs:1; + unsigned int src1_negate:1; + unsigned int pad:1; + unsigned int src1_horiz_stride:2; + unsigned int src1_width:3; + unsigned int src1_vert_stride:4; + unsigned int pad0:7; + } da1; + + struct + { + unsigned int src1_swz_x:2; + unsigned int src1_swz_y:2; + unsigned int src1_subreg_nr:1; + unsigned int src1_reg_nr:8; + unsigned int src1_abs:1; + unsigned int src1_negate:1; + unsigned int pad0:1; + unsigned int src1_swz_z:2; + unsigned int src1_swz_w:2; + unsigned int pad1:1; + unsigned int src1_vert_stride:4; + unsigned int pad2:7; + } da16; + + struct + { + int src1_indirect_offset:10; + unsigned int src1_subreg_nr:3; + unsigned int src1_abs:1; + unsigned int src1_negate:1; + unsigned int pad0:1; + unsigned int src1_horiz_stride:2; + unsigned int src1_width:3; + unsigned int src1_vert_stride:4; + unsigned int flag_reg_nr:1; + unsigned int pad1:6; + } ia1; + + struct + { + unsigned int src1_swz_x:2; + unsigned int src1_swz_y:2; + int src1_indirect_offset:6; + unsigned int src1_subreg_nr:3; + unsigned int src1_abs:1; + unsigned int src1_negate:1; + unsigned int pad0:1; + unsigned int src1_swz_z:2; + unsigned int src1_swz_w:2; + unsigned int pad1:1; + unsigned int src1_vert_stride:4; + unsigned int flag_reg_nr:1; + unsigned int pad2:6; + } ia16; + + + struct + { + int jump_count:16; /* note: signed */ + unsigned int pop_count:4; + unsigned int pad0:12; + } if_else; + + struct { + unsigned int function:4; + unsigned int int_type:1; + unsigned int precision:1; + unsigned int saturate:1; + unsigned int data_type:1; + unsigned int pad0:8; + unsigned int response_length:4; + unsigned int msg_length:4; + unsigned int msg_target:4; + unsigned int pad1:3; + unsigned int end_of_thread:1; + } math; + + struct { + unsigned int binding_table_index:8; + unsigned int sampler:4; + unsigned int return_format:2; + unsigned int msg_type:2; + unsigned int response_length:4; + unsigned int msg_length:4; + unsigned int msg_target:4; + unsigned int pad1:3; + unsigned int end_of_thread:1; + } sampler; + + struct gen5_urb_immediate urb; + + struct { + unsigned int binding_table_index:8; + unsigned int msg_control:4; + unsigned int msg_type:2; + unsigned int target_cache:2; + unsigned int response_length:4; + unsigned int msg_length:4; + unsigned int msg_target:4; + unsigned int pad1:3; + unsigned int end_of_thread:1; + } dp_read; + + struct { + unsigned int binding_table_index:8; + unsigned int msg_control:3; + unsigned int pixel_scoreboard_clear:1; + unsigned int msg_type:3; + unsigned int send_commit_msg:1; + unsigned int response_length:4; + unsigned int msg_length:4; + unsigned int msg_target:4; + unsigned int pad1:3; + unsigned int end_of_thread:1; + } dp_write; + + struct { + unsigned int pad:16; + unsigned int response_length:4; + unsigned int msg_length:4; + unsigned int msg_target:4; + unsigned int pad1:3; + unsigned int end_of_thread:1; + } generic; + + unsigned int ud; + } bits3; +}; + +/* media pipeline */ + +struct gen5_vfe_state { + struct { + unsigned int per_thread_scratch_space:4; + unsigned int pad3:3; + unsigned int extend_vfe_state_present:1; + unsigned int pad2:2; + unsigned int scratch_base:22; + } vfe0; + + struct { + unsigned int debug_counter_control:2; + unsigned int children_present:1; + unsigned int vfe_mode:4; + unsigned int pad2:2; + unsigned int num_urb_entries:7; + unsigned int urb_entry_alloc_size:9; + unsigned int max_threads:7; + } vfe1; + + struct { + unsigned int pad4:4; + unsigned int interface_descriptor_base:28; + } vfe2; +}; + +struct gen5_vld_state { + struct { + unsigned int pad6:6; + unsigned int scan_order:1; + unsigned int intra_vlc_format:1; + unsigned int quantizer_scale_type:1; + unsigned int concealment_motion_vector:1; + unsigned int frame_predict_frame_dct:1; + unsigned int top_field_first:1; + unsigned int picture_structure:2; + unsigned int intra_dc_precision:2; + unsigned int f_code_0_0:4; + unsigned int f_code_0_1:4; + unsigned int f_code_1_0:4; + unsigned int f_code_1_1:4; + } vld0; + + struct { + unsigned int pad2:9; + unsigned int picture_coding_type:2; + unsigned int pad:21; + } vld1; + + struct { + unsigned int index_0:4; + unsigned int index_1:4; + unsigned int index_2:4; + unsigned int index_3:4; + unsigned int index_4:4; + unsigned int index_5:4; + unsigned int index_6:4; + unsigned int index_7:4; + } desc_remap_table0; + + struct { + unsigned int index_8:4; + unsigned int index_9:4; + unsigned int index_10:4; + unsigned int index_11:4; + unsigned int index_12:4; + unsigned int index_13:4; + unsigned int index_14:4; + unsigned int index_15:4; + } desc_remap_table1; +}; + +struct gen5_interface_descriptor { + struct { + unsigned int grf_reg_blocks:4; + unsigned int pad:2; + unsigned int kernel_start_pointer:26; + } desc0; + + struct { + unsigned int pad:7; + unsigned int software_exception:1; + unsigned int pad2:3; + unsigned int maskstack_exception:1; + unsigned int pad3:1; + unsigned int illegal_opcode_exception:1; + unsigned int pad4:2; + unsigned int floating_point_mode:1; + unsigned int thread_priority:1; + unsigned int single_program_flow:1; + unsigned int pad5:1; + unsigned int const_urb_entry_read_offset:6; + unsigned int const_urb_entry_read_len:6; + } desc1; + + struct { + unsigned int pad:2; + unsigned int sampler_count:3; + unsigned int sampler_state_pointer:27; + } desc2; + + struct { + unsigned int binding_table_entry_count:5; + unsigned int binding_table_pointer:27; + } desc3; +}; + +struct gen6_blend_state +{ + struct { + unsigned int dest_blend_factor:5; + unsigned int source_blend_factor:5; + unsigned int pad3:1; + unsigned int blend_func:3; + unsigned int pad2:1; + unsigned int ia_dest_blend_factor:5; + unsigned int ia_source_blend_factor:5; + unsigned int pad1:1; + unsigned int ia_blend_func:3; + unsigned int pad0:1; + unsigned int ia_blend_enable:1; + unsigned int blend_enable:1; + } blend0; + + struct { + unsigned int post_blend_clamp_enable:1; + unsigned int pre_blend_clamp_enable:1; + unsigned int clamp_range:2; + unsigned int pad0:4; + unsigned int x_dither_offset:2; + unsigned int y_dither_offset:2; + unsigned int dither_enable:1; + unsigned int alpha_test_func:3; + unsigned int alpha_test_enable:1; + unsigned int pad1:1; + unsigned int logic_op_func:4; + unsigned int logic_op_enable:1; + unsigned int pad2:1; + unsigned int write_disable_b:1; + unsigned int write_disable_g:1; + unsigned int write_disable_r:1; + unsigned int write_disable_a:1; + unsigned int pad3:1; + unsigned int alpha_to_coverage_dither:1; + unsigned int alpha_to_one:1; + unsigned int alpha_to_coverage:1; + } blend1; +}; + +struct gen6_color_calc_state +{ + struct { + unsigned int alpha_test_format:1; + unsigned int pad0:14; + unsigned int round_disable:1; + unsigned int bf_stencil_ref:8; + unsigned int stencil_ref:8; + } cc0; + + union { + float alpha_ref_f; + struct { + unsigned int ui:8; + unsigned int pad0:24; + } alpha_ref_fi; + } cc1; + + float constant_r; + float constant_g; + float constant_b; + float constant_a; +}; + +struct gen6_depth_stencil_state +{ + struct { + unsigned int pad0:3; + unsigned int bf_stencil_pass_depth_pass_op:3; + unsigned int bf_stencil_pass_depth_fail_op:3; + unsigned int bf_stencil_fail_op:3; + unsigned int bf_stencil_func:3; + unsigned int bf_stencil_enable:1; + unsigned int pad1:2; + unsigned int stencil_write_enable:1; + unsigned int stencil_pass_depth_pass_op:3; + unsigned int stencil_pass_depth_fail_op:3; + unsigned int stencil_fail_op:3; + unsigned int stencil_func:3; + unsigned int stencil_enable:1; + } ds0; + + struct { + unsigned int bf_stencil_write_mask:8; + unsigned int bf_stencil_test_mask:8; + unsigned int stencil_write_mask:8; + unsigned int stencil_test_mask:8; + } ds1; + + struct { + unsigned int pad0:26; + unsigned int depth_write_enable:1; + unsigned int depth_test_func:3; + unsigned int pad1:1; + unsigned int depth_test_enable:1; + } ds2; +}; + +typedef enum { + SAMPLER_FILTER_NEAREST = 0, + SAMPLER_FILTER_BILINEAR, + FILTER_COUNT +} sampler_filter_t; + +typedef enum { + SAMPLER_EXTEND_NONE = 0, + SAMPLER_EXTEND_REPEAT, + SAMPLER_EXTEND_PAD, + SAMPLER_EXTEND_REFLECT, + EXTEND_COUNT +} sampler_extend_t; + +typedef enum { + WM_KERNEL = 0, + WM_KERNEL_PROJECTIVE, + + WM_KERNEL_MASK, + WM_KERNEL_MASK_PROJECTIVE, + + WM_KERNEL_MASKCA, + WM_KERNEL_MASKCA_PROJECTIVE, + + WM_KERNEL_MASKCA_SRCALPHA, + WM_KERNEL_MASKCA_SRCALPHA_PROJECTIVE, + + WM_KERNEL_VIDEO_PLANAR, + WM_KERNEL_VIDEO_PACKED, + KERNEL_COUNT +} wm_kernel_t; +#endif diff --git a/src/sna/gen6_render.c b/src/sna/gen6_render.c new file mode 100644 index 00000000..5d8eb77b --- /dev/null +++ b/src/sna/gen6_render.c @@ -0,0 +1,2860 @@ +/* + * Copyright © 2006,2008,2011 Intel Corporation + * Copyright © 2007 Red Hat, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Wang Zhenyu <zhenyu.z.wang@sna.com> + * Eric Anholt <eric@anholt.net> + * Carl Worth <cworth@redhat.com> + * Keith Packard <keithp@keithp.com> + * Chris Wilson <chris@chris-wilson.co.uk> + * + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <xf86.h> + +#include "sna.h" +#include "sna_reg.h" +#include "sna_render.h" +#include "sna_render_inline.h" +#include "sna_video.h" + +#include "gen6_render.h" + +#if DEBUG_RENDER +#undef DBG +#define DBG(x) ErrorF x +#else +#define NDEBUG 1 +#endif + +#define ALWAYS_EMIT_DRAWRECT 1 + +#define NO_COMPOSITE 0 +#define NO_COPY 0 +#define NO_COPY_BOXES 0 +#define NO_FILL 0 +#define NO_FILL_BOXES 0 + +static const uint32_t ps_kernel_nomask_affine[][4] = { +#include "exa_wm_src_affine.g6b" +#include "exa_wm_src_sample_argb.g6b" +#include "exa_wm_write.g6b" +}; + +static const uint32_t ps_kernel_nomask_projective[][4] = { +#include "exa_wm_src_projective.g6b" +#include "exa_wm_src_sample_argb.g6b" +#include "exa_wm_write.g6b" +}; + +static const uint32_t ps_kernel_maskca_affine[][4] = { +#include "exa_wm_src_affine.g6b" +#include "exa_wm_src_sample_argb.g6b" +#include "exa_wm_mask_affine.g6b" +#include "exa_wm_mask_sample_argb.g6b" +#include "exa_wm_ca.g6b" +#include "exa_wm_write.g6b" +}; + +static const uint32_t ps_kernel_maskca_projective[][4] = { +#include "exa_wm_src_projective.g6b" +#include "exa_wm_src_sample_argb.g6b" +#include "exa_wm_mask_projective.g6b" +#include "exa_wm_mask_sample_argb.g6b" +#include "exa_wm_ca.g6b" +#include "exa_wm_write.g6b" +}; + +static const uint32_t ps_kernel_maskca_srcalpha_affine[][4] = { +#include "exa_wm_src_affine.g6b" +#include "exa_wm_src_sample_a.g6b" +#include "exa_wm_mask_affine.g6b" +#include "exa_wm_mask_sample_argb.g6b" +#include "exa_wm_ca_srcalpha.g6b" +#include "exa_wm_write.g6b" +}; + +static const uint32_t ps_kernel_maskca_srcalpha_projective[][4] = { +#include "exa_wm_src_projective.g6b" +#include "exa_wm_src_sample_a.g6b" +#include "exa_wm_mask_projective.g6b" +#include "exa_wm_mask_sample_argb.g6b" +#include "exa_wm_ca_srcalpha.g6b" +#include "exa_wm_write.g6b" +}; + +static const uint32_t ps_kernel_masknoca_affine[][4] = { +#include "exa_wm_src_affine.g6b" +#include "exa_wm_src_sample_argb.g6b" +#include "exa_wm_mask_affine.g6b" +#include "exa_wm_mask_sample_a.g6b" +#include "exa_wm_noca.g6b" +#include "exa_wm_write.g6b" +}; + +static const uint32_t ps_kernel_masknoca_projective[][4] = { +#include "exa_wm_src_projective.g6b" +#include "exa_wm_src_sample_argb.g6b" +#include "exa_wm_mask_projective.g6b" +#include "exa_wm_mask_sample_a.g6b" +#include "exa_wm_noca.g6b" +#include "exa_wm_write.g6b" +}; + +static const uint32_t ps_kernel_packed[][4] = { +#include "exa_wm_src_affine.g6b" +#include "exa_wm_src_sample_argb.g6b" +#include "exa_wm_yuv_rgb.g6b" +#include "exa_wm_write.g6b" +}; + +static const uint32_t ps_kernel_planar[][4] = { +#include "exa_wm_src_affine.g6b" +#include "exa_wm_src_sample_planar.g6b" +#include "exa_wm_yuv_rgb.g6b" +#include "exa_wm_write.g6b" +}; + +#define KERNEL(kernel_enum, kernel, masked) \ + [GEN6_WM_KERNEL_##kernel_enum] = {#kernel_enum, kernel, sizeof(kernel), masked} +static const struct wm_kernel_info { + const char *name; + const void *data; + unsigned int size; + Bool has_mask; +} wm_kernels[] = { + KERNEL(NOMASK, ps_kernel_nomask_affine, FALSE), + KERNEL(NOMASK_PROJECTIVE, ps_kernel_nomask_projective, FALSE), + + KERNEL(MASK, ps_kernel_masknoca_affine, TRUE), + KERNEL(MASK_PROJECTIVE, ps_kernel_masknoca_projective, TRUE), + + KERNEL(MASKCA, ps_kernel_maskca_affine, TRUE), + KERNEL(MASKCA_PROJECTIVE, ps_kernel_maskca_projective, TRUE), + + KERNEL(MASKCA_SRCALPHA, ps_kernel_maskca_srcalpha_affine, TRUE), + KERNEL(MASKCA_SRCALPHA_PROJECTIVE, ps_kernel_maskca_srcalpha_projective, TRUE), + + KERNEL(VIDEO_PLANAR, ps_kernel_planar, FALSE), + KERNEL(VIDEO_PACKED, ps_kernel_packed, FALSE), +}; +#undef KERNEL + +static const struct blendinfo { + Bool src_alpha; + uint32_t src_blend; + uint32_t dst_blend; +} gen6_blend_op[] = { + /* Clear */ {0, GEN6_BLENDFACTOR_ZERO, GEN6_BLENDFACTOR_ZERO}, + /* Src */ {0, GEN6_BLENDFACTOR_ONE, GEN6_BLENDFACTOR_ZERO}, + /* Dst */ {0, GEN6_BLENDFACTOR_ZERO, GEN6_BLENDFACTOR_ONE}, + /* Over */ {1, GEN6_BLENDFACTOR_ONE, GEN6_BLENDFACTOR_INV_SRC_ALPHA}, + /* OverReverse */ {0, GEN6_BLENDFACTOR_INV_DST_ALPHA, GEN6_BLENDFACTOR_ONE}, + /* In */ {0, GEN6_BLENDFACTOR_DST_ALPHA, GEN6_BLENDFACTOR_ZERO}, + /* InReverse */ {1, GEN6_BLENDFACTOR_ZERO, GEN6_BLENDFACTOR_SRC_ALPHA}, + /* Out */ {0, GEN6_BLENDFACTOR_INV_DST_ALPHA, GEN6_BLENDFACTOR_ZERO}, + /* OutReverse */ {1, GEN6_BLENDFACTOR_ZERO, GEN6_BLENDFACTOR_INV_SRC_ALPHA}, + /* Atop */ {1, GEN6_BLENDFACTOR_DST_ALPHA, GEN6_BLENDFACTOR_INV_SRC_ALPHA}, + /* AtopReverse */ {1, GEN6_BLENDFACTOR_INV_DST_ALPHA, GEN6_BLENDFACTOR_SRC_ALPHA}, + /* Xor */ {1, GEN6_BLENDFACTOR_INV_DST_ALPHA, GEN6_BLENDFACTOR_INV_SRC_ALPHA}, + /* Add */ {0, GEN6_BLENDFACTOR_ONE, GEN6_BLENDFACTOR_ONE}, +}; + +/** + * Highest-valued BLENDFACTOR used in gen6_blend_op. + * + * This leaves out GEN6_BLENDFACTOR_INV_DST_COLOR, + * GEN6_BLENDFACTOR_INV_CONST_{COLOR,ALPHA}, + * GEN6_BLENDFACTOR_INV_SRC1_{COLOR,ALPHA} + */ +#define GEN6_BLENDFACTOR_COUNT (GEN6_BLENDFACTOR_INV_DST_ALPHA + 1) + +/* FIXME: surface format defined in gen6_defines.h, shared Sampling engine + * 1.7.2 + */ +static const struct formatinfo { + CARD32 pict_fmt; + uint32_t card_fmt; +} gen6_tex_formats[] = { + {PICT_a8, GEN6_SURFACEFORMAT_A8_UNORM}, + {PICT_a8r8g8b8, GEN6_SURFACEFORMAT_B8G8R8A8_UNORM}, + {PICT_x8r8g8b8, GEN6_SURFACEFORMAT_B8G8R8X8_UNORM}, + {PICT_a8b8g8r8, GEN6_SURFACEFORMAT_R8G8B8A8_UNORM}, + {PICT_x8b8g8r8, GEN6_SURFACEFORMAT_R8G8B8X8_UNORM}, + {PICT_r8g8b8, GEN6_SURFACEFORMAT_R8G8B8_UNORM}, + {PICT_r5g6b5, GEN6_SURFACEFORMAT_B5G6R5_UNORM}, + {PICT_a1r5g5b5, GEN6_SURFACEFORMAT_B5G5R5A1_UNORM}, + {PICT_a2r10g10b10, GEN6_SURFACEFORMAT_B10G10R10A2_UNORM}, + {PICT_x2r10g10b10, GEN6_SURFACEFORMAT_B10G10R10X2_UNORM}, + {PICT_a2b10g10r10, GEN6_SURFACEFORMAT_R10G10B10A2_UNORM}, + {PICT_x2r10g10b10, GEN6_SURFACEFORMAT_B10G10R10X2_UNORM}, + {PICT_a4r4g4b4, GEN6_SURFACEFORMAT_B4G4R4A4_UNORM}, +}; + +#define GEN6_BLEND_STATE_PADDED_SIZE ALIGN(sizeof(struct gen6_blend_state), 64) + +#define BLEND_OFFSET(s, d) \ + (((s) * GEN6_BLENDFACTOR_COUNT + (d)) * GEN6_BLEND_STATE_PADDED_SIZE) + +#define SAMPLER_OFFSET(sf, se, mf, me) \ + (((((sf) * EXTEND_COUNT + (se)) * FILTER_COUNT + (mf)) * EXTEND_COUNT + (me)) * 2 * sizeof(struct gen6_sampler_state)) + +#define OUT_BATCH(v) batch_emit(sna, v) +#define OUT_VERTEX(x,y) vertex_emit_2s(sna, x,y) +#define OUT_VERTEX_F(v) vertex_emit(sna, v) + +static uint32_t gen6_get_blend(int op, + Bool has_component_alpha, + uint32_t dst_format) +{ + uint32_t src, dst; + + src = gen6_blend_op[op].src_blend; + dst = gen6_blend_op[op].dst_blend; + + /* If there's no dst alpha channel, adjust the blend op so that + * we'll treat it always as 1. + */ + if (PICT_FORMAT_A(dst_format) == 0) { + if (src == GEN6_BLENDFACTOR_DST_ALPHA) + src = GEN6_BLENDFACTOR_ONE; + else if (src == GEN6_BLENDFACTOR_INV_DST_ALPHA) + src = GEN6_BLENDFACTOR_ZERO; + } + + /* If the source alpha is being used, then we should only be in a + * case where the source blend factor is 0, and the source blend + * value is the mask channels multiplied by the source picture's alpha. + */ + if (has_component_alpha && gen6_blend_op[op].src_alpha) { + if (dst == GEN6_BLENDFACTOR_SRC_ALPHA) + dst = GEN6_BLENDFACTOR_SRC_COLOR; + else if (dst == GEN6_BLENDFACTOR_INV_SRC_ALPHA) + dst = GEN6_BLENDFACTOR_INV_SRC_COLOR; + } + + DBG(("blend op=%d, dst=%x [A=%d] => src=%d, dst=%d => offset=%x\n", + op, dst_format, PICT_FORMAT_A(dst_format), + src, dst, (int)BLEND_OFFSET(src, dst))); + return BLEND_OFFSET(src, dst); +} + +static uint32_t gen6_get_dest_format(PictFormat format) +{ + switch (format) { + default: + assert(0); + case PICT_a8r8g8b8: + case PICT_x8r8g8b8: + return GEN6_SURFACEFORMAT_B8G8R8A8_UNORM; + case PICT_a8b8g8r8: + case PICT_x8b8g8r8: + return GEN6_SURFACEFORMAT_R8G8B8A8_UNORM; + case PICT_a2r10g10b10: + case PICT_x2r10g10b10: + return GEN6_SURFACEFORMAT_B10G10R10A2_UNORM; + case PICT_r5g6b5: + return GEN6_SURFACEFORMAT_B5G6R5_UNORM; + case PICT_x1r5g5b5: + case PICT_a1r5g5b5: + return GEN6_SURFACEFORMAT_B5G5R5A1_UNORM; + case PICT_a8: + return GEN6_SURFACEFORMAT_A8_UNORM; + case PICT_a4r4g4b4: + case PICT_x4r4g4b4: + return GEN6_SURFACEFORMAT_B4G4R4A4_UNORM; + } +} + +static Bool gen6_check_dst_format(PictFormat format) +{ + switch (format) { + case PICT_a8r8g8b8: + case PICT_x8r8g8b8: + case PICT_a8b8g8r8: + case PICT_x8b8g8r8: + case PICT_a2r10g10b10: + case PICT_x2r10g10b10: + case PICT_r5g6b5: + case PICT_x1r5g5b5: + case PICT_a1r5g5b5: + case PICT_a8: + case PICT_a4r4g4b4: + case PICT_x4r4g4b4: + return TRUE; + } + return FALSE; +} + +static uint32_t gen6_get_dest_format_for_depth(int depth) +{ + switch (depth) { + default: assert(0); + case 32: + case 24: return GEN6_SURFACEFORMAT_B8G8R8A8_UNORM; + case 30: return GEN6_SURFACEFORMAT_B10G10R10A2_UNORM; + case 16: return GEN6_SURFACEFORMAT_B5G6R5_UNORM; + case 8: return GEN6_SURFACEFORMAT_A8_UNORM; + } +} + +static uint32_t gen6_get_card_format_for_depth(int depth) +{ + switch (depth) { + default: assert(0); + case 32: return GEN6_SURFACEFORMAT_B8G8R8A8_UNORM; + case 30: return GEN6_SURFACEFORMAT_B10G10R10A2_UNORM; + case 24: return GEN6_SURFACEFORMAT_B8G8R8X8_UNORM; + case 16: return GEN6_SURFACEFORMAT_B5G6R5_UNORM; + case 8: return GEN6_SURFACEFORMAT_A8_UNORM; + } +} + +static bool gen6_format_is_dst(uint32_t format) +{ + switch (format) { + case GEN6_SURFACEFORMAT_B8G8R8A8_UNORM: + case GEN6_SURFACEFORMAT_R8G8B8A8_UNORM: + case GEN6_SURFACEFORMAT_B10G10R10A2_UNORM: + case GEN6_SURFACEFORMAT_B5G6R5_UNORM: + case GEN6_SURFACEFORMAT_B5G5R5A1_UNORM: + case GEN6_SURFACEFORMAT_A8_UNORM: + case GEN6_SURFACEFORMAT_B4G4R4A4_UNORM: + return true; + default: + return false; + } +} + +static uint32_t gen6_filter(uint32_t filter) +{ + switch (filter) { + default: + assert(0); + case PictFilterNearest: + return SAMPLER_FILTER_NEAREST; + case PictFilterBilinear: + return SAMPLER_FILTER_BILINEAR; + } +} + +static uint32_t gen6_check_filter(PicturePtr picture) +{ + switch (picture->filter) { + case PictFilterNearest: + case PictFilterBilinear: + return TRUE; + default: + return FALSE; + } +} + +static uint32_t gen6_repeat(uint32_t repeat) +{ + switch (repeat) { + default: + assert(0); + case RepeatNone: + return SAMPLER_EXTEND_NONE; + case RepeatNormal: + return SAMPLER_EXTEND_REPEAT; + case RepeatPad: + return SAMPLER_EXTEND_PAD; + case RepeatReflect: + return SAMPLER_EXTEND_REFLECT; + } +} + +static bool gen6_check_repeat(PicturePtr picture) +{ + if (!picture->repeat) + return TRUE; + + switch (picture->repeatType) { + case RepeatNone: + case RepeatNormal: + case RepeatPad: + case RepeatReflect: + return TRUE; + default: + return FALSE; + } +} + +static int +gen6_choose_composite_kernel(int op, Bool has_mask, Bool is_ca, Bool is_affine) +{ + int base; + + if (has_mask) { + if (is_ca) { + if (gen6_blend_op[op].src_alpha) + base = GEN6_WM_KERNEL_MASKCA_SRCALPHA; + else + base = GEN6_WM_KERNEL_MASKCA; + } else + base = GEN6_WM_KERNEL_MASK; + } else + base = GEN6_WM_KERNEL_NOMASK; + + return base + !is_affine; +} + +static void +gen6_emit_sip(struct sna *sna) +{ + /* Set system instruction pointer */ + OUT_BATCH(GEN6_STATE_SIP | 0); + OUT_BATCH(0); +} + +static void +gen6_emit_urb(struct sna *sna) +{ + OUT_BATCH(GEN6_3DSTATE_URB | (3 - 2)); + OUT_BATCH(((1 - 1) << GEN6_3DSTATE_URB_VS_SIZE_SHIFT) | + (24 << GEN6_3DSTATE_URB_VS_ENTRIES_SHIFT)); /* at least 24 on GEN6 */ + OUT_BATCH((0 << GEN6_3DSTATE_URB_GS_SIZE_SHIFT) | + (0 << GEN6_3DSTATE_URB_GS_ENTRIES_SHIFT)); /* no GS thread */ +} + +static void +gen6_emit_state_base_address(struct sna *sna) +{ + OUT_BATCH(GEN6_STATE_BASE_ADDRESS | (10 - 2)); + OUT_BATCH(0); /* general */ + OUT_BATCH(kgem_add_reloc(&sna->kgem, /* surface */ + sna->kgem.nbatch, + NULL, + I915_GEM_DOMAIN_INSTRUCTION << 16, + BASE_ADDRESS_MODIFY)); + OUT_BATCH(kgem_add_reloc(&sna->kgem, /* instruction */ + sna->kgem.nbatch, + sna->render_state.gen6.general_bo, + I915_GEM_DOMAIN_INSTRUCTION << 16, + BASE_ADDRESS_MODIFY)); + OUT_BATCH(0); /* indirect */ + OUT_BATCH(kgem_add_reloc(&sna->kgem, + sna->kgem.nbatch, + sna->render_state.gen6.general_bo, + I915_GEM_DOMAIN_INSTRUCTION << 16, + BASE_ADDRESS_MODIFY)); + + /* upper bounds, disable */ + OUT_BATCH(0); + OUT_BATCH(BASE_ADDRESS_MODIFY); + OUT_BATCH(0); + OUT_BATCH(BASE_ADDRESS_MODIFY); +} + +static void +gen6_emit_viewports(struct sna *sna) +{ + OUT_BATCH(GEN6_3DSTATE_VIEWPORT_STATE_POINTERS | + GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CC | + (4 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(sna->render_state.gen6.cc_vp); +} + +static void +gen6_emit_vs(struct sna *sna) +{ + /* disable VS constant buffer */ + OUT_BATCH(GEN6_3DSTATE_CONSTANT_VS | (5 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + + OUT_BATCH(GEN6_3DSTATE_VS | (6 - 2)); + OUT_BATCH(0); /* no VS kernel */ + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); /* pass-through */ +} + +static void +gen6_emit_gs(struct sna *sna) +{ + /* disable GS constant buffer */ + OUT_BATCH(GEN6_3DSTATE_CONSTANT_GS | (5 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + + OUT_BATCH(GEN6_3DSTATE_GS | (7 - 2)); + OUT_BATCH(0); /* no GS kernel */ + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); /* pass-through */ +} + +static void +gen6_emit_clip(struct sna *sna) +{ + OUT_BATCH(GEN6_3DSTATE_CLIP | (4 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); /* pass-through */ + OUT_BATCH(0); +} + +static void +gen6_emit_wm_constants(struct sna *sna) +{ + /* disable WM constant buffer */ + OUT_BATCH(GEN6_3DSTATE_CONSTANT_PS | (5 - 2)); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); +} + +static void +gen6_emit_null_depth_buffer(struct sna *sna) +{ + OUT_BATCH(GEN6_3DSTATE_DEPTH_BUFFER | (7 - 2)); + OUT_BATCH(GEN6_SURFACE_NULL << GEN6_3DSTATE_DEPTH_BUFFER_TYPE_SHIFT | + GEN6_DEPTHFORMAT_D32_FLOAT << GEN6_3DSTATE_DEPTH_BUFFER_FORMAT_SHIFT); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + + OUT_BATCH(GEN6_3DSTATE_CLEAR_PARAMS | (2 - 2)); + OUT_BATCH(0); +} + +static void +gen6_emit_invariant(struct sna *sna) +{ + OUT_BATCH(GEN6_PIPELINE_SELECT | PIPELINE_SELECT_3D); + + OUT_BATCH(GEN6_3DSTATE_MULTISAMPLE | (3 - 2)); + OUT_BATCH(GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER | + GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1); /* 1 sample/pixel */ + OUT_BATCH(0); + + OUT_BATCH(GEN6_3DSTATE_SAMPLE_MASK | (2 - 2)); + OUT_BATCH(1); + + gen6_emit_sip(sna); + gen6_emit_urb(sna); + + gen6_emit_state_base_address(sna); + + gen6_emit_viewports(sna); + gen6_emit_vs(sna); + gen6_emit_gs(sna); + gen6_emit_clip(sna); + gen6_emit_wm_constants(sna); + gen6_emit_null_depth_buffer(sna); + + sna->render_state.gen6.needs_invariant = FALSE; +} + +static void +gen6_emit_cc(struct sna *sna, uint32_t blend_offset) +{ + struct gen6_render_state *render = &sna->render_state.gen6; + + if (render->blend == blend_offset) + return; + + OUT_BATCH(GEN6_3DSTATE_CC_STATE_POINTERS | (4 - 2)); + OUT_BATCH((render->cc_blend + blend_offset) | 1); + if (render->blend == -1) { + OUT_BATCH(1); + OUT_BATCH(1); + } else { + OUT_BATCH(0); + OUT_BATCH(0); + } + + render->blend = blend_offset; +} + +static void +gen6_emit_sampler(struct sna *sna, uint32_t state) +{ + assert(state < + 2 * sizeof(struct gen6_sampler_state) * + FILTER_COUNT * EXTEND_COUNT * + FILTER_COUNT * EXTEND_COUNT); + + if (sna->render_state.gen6.samplers == state) + return; + + sna->render_state.gen6.samplers = state; + + OUT_BATCH(GEN6_3DSTATE_SAMPLER_STATE_POINTERS | + GEN6_3DSTATE_SAMPLER_STATE_MODIFY_PS | + (4 - 2)); + OUT_BATCH(0); /* VS */ + OUT_BATCH(0); /* GS */ + OUT_BATCH(sna->render_state.gen6.wm_state + state); +} + +static void +gen6_emit_sf(struct sna *sna, Bool has_mask) +{ + int num_sf_outputs = has_mask ? 2 : 1; + + if (sna->render_state.gen6.num_sf_outputs == num_sf_outputs) + return; + + DBG(("%s: num_sf_outputs=%d, read_length=%d, read_offset=%d\n", + __FUNCTION__, num_sf_outputs, 1, 0)); + + sna->render_state.gen6.num_sf_outputs = num_sf_outputs; + + OUT_BATCH(GEN6_3DSTATE_SF | (20 - 2)); + OUT_BATCH(num_sf_outputs << GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT | + 1 << GEN6_3DSTATE_SF_URB_ENTRY_READ_LENGTH_SHIFT | + 1 << GEN6_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT); + OUT_BATCH(0); + OUT_BATCH(GEN6_3DSTATE_SF_CULL_NONE); + OUT_BATCH(2 << GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT); /* DW4 */ + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); /* DW9 */ + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); /* DW14 */ + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); /* DW19 */ +} + +static void +gen6_emit_wm(struct sna *sna, int kernel, int nr_surfaces, int nr_inputs) +{ + if (sna->render_state.gen6.kernel == kernel) + return; + + sna->render_state.gen6.kernel = kernel; + + DBG(("%s: switching to %s\n", __FUNCTION__, wm_kernels[kernel].name)); + + OUT_BATCH(GEN6_3DSTATE_WM | (9 - 2)); + OUT_BATCH(sna->render_state.gen6.wm_kernel[kernel]); + OUT_BATCH(1 << GEN6_3DSTATE_WM_SAMPLER_COUNT_SHIFT | + nr_surfaces << GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT); + OUT_BATCH(0); + OUT_BATCH(6 << GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT); /* DW4 */ + OUT_BATCH((40 - 1) << GEN6_3DSTATE_WM_MAX_THREADS_SHIFT | + GEN6_3DSTATE_WM_DISPATCH_ENABLE | + GEN6_3DSTATE_WM_16_DISPATCH_ENABLE); + OUT_BATCH(nr_inputs << GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT | + GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC); + OUT_BATCH(0); + OUT_BATCH(0); +} + +static bool +gen6_emit_binding_table(struct sna *sna, uint16_t offset) +{ + if (sna->render_state.gen6.surface_table == offset) + return false; + + /* Binding table pointers */ + OUT_BATCH(GEN6_3DSTATE_BINDING_TABLE_POINTERS | + GEN6_3DSTATE_BINDING_TABLE_MODIFY_PS | + (4 - 2)); + OUT_BATCH(0); /* vs */ + OUT_BATCH(0); /* gs */ + /* Only the PS uses the binding table */ + OUT_BATCH(offset*4); + + sna->render_state.gen6.surface_table = offset; + return true; +} + +static void +gen6_emit_drawing_rectangle(struct sna *sna, + const struct sna_composite_op *op, + bool force) +{ + uint32_t limit = (op->dst.height - 1) << 16 | (op->dst.width - 1); + uint32_t offset = (uint16_t)op->dst.y << 16 | (uint16_t)op->dst.x; + + if (!force && + sna->render_state.gen6.drawrect_limit == limit && + sna->render_state.gen6.drawrect_offset == offset) + return; + + sna->render_state.gen6.drawrect_offset = offset; + sna->render_state.gen6.drawrect_limit = limit; + + OUT_BATCH(GEN6_3DSTATE_DRAWING_RECTANGLE | (4 - 2)); + OUT_BATCH(0); + OUT_BATCH(limit); + OUT_BATCH(offset); +} + +static void +gen6_emit_vertex_elements(struct sna *sna, + const struct sna_composite_op *op) +{ + /* + * vertex data in vertex buffer + * position: (x, y) + * texture coordinate 0: (u0, v0) if (is_affine is TRUE) else (u0, v0, w0) + * texture coordinate 1 if (has_mask is TRUE): same as above + */ + struct gen6_render_state *render = &sna->render_state.gen6; + int nelem = op->mask.bo ? 2 : 1; + int selem = op->is_affine ? 2 : 3; + uint32_t w_component; + uint32_t src_format; + int id = op->u.gen6.ve_id; + + if (render->ve_id == id) + return; + render->ve_id = id; + + if (op->is_affine) { + src_format = GEN6_SURFACEFORMAT_R32G32_FLOAT; + w_component = GEN6_VFCOMPONENT_STORE_1_FLT; + } else { + src_format = GEN6_SURFACEFORMAT_R32G32B32_FLOAT; + w_component = GEN6_VFCOMPONENT_STORE_SRC; + } + + /* The VUE layout + * dword 0-3: pad (0.0, 0.0, 0.0. 0.0) + * dword 4-7: position (x, y, 1.0, 1.0), + * dword 8-11: texture coordinate 0 (u0, v0, w0, 1.0) + * dword 12-15: texture coordinate 1 (u1, v1, w1, 1.0) + * + * dword 4-15 are fetched from vertex buffer + */ + OUT_BATCH(GEN6_3DSTATE_VERTEX_ELEMENTS | + ((2 * (2 + nelem)) + 1 - 2)); + + OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID | + GEN6_SURFACEFORMAT_R32G32B32A32_FLOAT << VE0_FORMAT_SHIFT | + 0 << VE0_OFFSET_SHIFT); + OUT_BATCH(GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_0_SHIFT | + GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_1_SHIFT | + GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_2_SHIFT | + GEN6_VFCOMPONENT_STORE_0 << VE1_VFCOMPONENT_3_SHIFT); + + /* x,y */ + OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID | + GEN6_SURFACEFORMAT_R16G16_SSCALED << VE0_FORMAT_SHIFT | + 0 << VE0_OFFSET_SHIFT); /* offsets vb in bytes */ + OUT_BATCH(GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT | + GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT | + GEN6_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_2_SHIFT | + GEN6_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT); + + /* u0, v0, w0 */ + OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID | + src_format << VE0_FORMAT_SHIFT | + 4 << VE0_OFFSET_SHIFT); /* offset vb in bytes */ + OUT_BATCH(GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT | + GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT | + w_component << VE1_VFCOMPONENT_2_SHIFT | + GEN6_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT); + + /* u1, v1, w1 */ + if (op->mask.bo) { + OUT_BATCH(id << VE0_VERTEX_BUFFER_INDEX_SHIFT | VE0_VALID | + src_format << VE0_FORMAT_SHIFT | + ((1 + selem) * 4) << VE0_OFFSET_SHIFT); /* vb offset in bytes */ + OUT_BATCH(GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_0_SHIFT | + GEN6_VFCOMPONENT_STORE_SRC << VE1_VFCOMPONENT_1_SHIFT | + w_component << VE1_VFCOMPONENT_2_SHIFT | + GEN6_VFCOMPONENT_STORE_1_FLT << VE1_VFCOMPONENT_3_SHIFT); + } +} + +static void +gen6_emit_state(struct sna *sna, + const struct sna_composite_op *op, + uint16_t wm_binding_table) + +{ + bool need_flush = + (sna->kgem.batch[sna->kgem.nbatch-1] & (0xff<<23)) != MI_FLUSH; + + gen6_emit_cc(sna, + gen6_get_blend(op->op, + op->has_component_alpha, + op->dst.format)); + + DBG(("%s: sampler src=(%d, %d), mask=(%d, %d), offset=%d\n", + __FUNCTION__, + op->src.filter, op->src.repeat, + op->mask.filter, op->mask.repeat, + (int)SAMPLER_OFFSET(op->src.filter, + op->src.repeat, + op->mask.filter, + op->mask.repeat))); + gen6_emit_sampler(sna, + SAMPLER_OFFSET(op->src.filter, + op->src.repeat, + op->mask.filter, + op->mask.repeat)); + gen6_emit_sf(sna, op->mask.bo != NULL); + gen6_emit_wm(sna, + op->u.gen6.wm_kernel, + op->u.gen6.nr_surfaces, + op->u.gen6.nr_inputs); + gen6_emit_vertex_elements(sna, op); + + /* XXX updating the binding table requires a non-pipelined cmd? */ + need_flush &= gen6_emit_binding_table(sna, wm_binding_table); + gen6_emit_drawing_rectangle(sna, op, need_flush); +} + +static void gen6_magic_ca_pass(struct sna *sna, + const struct sna_composite_op *op) +{ + struct gen6_render_state *state = &sna->render_state.gen6; + + if (!op->need_magic_ca_pass) + return; + + DBG(("%s: CA fixup (%d -> %d)\n", __FUNCTION__, + sna->render.vertex_start, sna->render.vertex_index)); + + gen6_emit_cc(sna, + gen6_get_blend(PictOpAdd, TRUE, op->dst.format)); + gen6_emit_wm(sna, + gen6_choose_composite_kernel(PictOpAdd, + TRUE, TRUE, + op->is_affine), + 3, 2); + + OUT_BATCH(GEN6_3DPRIMITIVE | + GEN6_3DPRIMITIVE_VERTEX_SEQUENTIAL | + _3DPRIM_RECTLIST << GEN6_3DPRIMITIVE_TOPOLOGY_SHIFT | + 0 << 9 | + 4); + OUT_BATCH(sna->render.vertex_index - sna->render.vertex_start); + OUT_BATCH(sna->render.vertex_start); + OUT_BATCH(1); /* single instance */ + OUT_BATCH(0); /* start instance location */ + OUT_BATCH(0); /* index buffer offset, ignored */ + + state->last_primitive = sna->kgem.nbatch; +} + +static void gen6_vertex_flush(struct sna *sna) +{ + if (sna->render_state.gen6.vertex_offset == 0) + return; + + DBG(("%s[%x] = %d\n", __FUNCTION__, + 4*sna->render_state.gen6.vertex_offset, + sna->render.vertex_index - sna->render.vertex_start)); + sna->kgem.batch[sna->render_state.gen6.vertex_offset] = + sna->render.vertex_index - sna->render.vertex_start; + sna->render_state.gen6.vertex_offset = 0; + + if (sna->render.op) + gen6_magic_ca_pass(sna, sna->render.op); +} + +static void gen6_vertex_finish(struct sna *sna, Bool last) +{ + struct kgem_bo *bo; + int i, delta; + + gen6_vertex_flush(sna); + if (!sna->render.vertex_used) + return; + + /* Note: we only need dword alignment (currently) */ + + if (last && sna->kgem.nbatch + sna->render.vertex_used <= sna->kgem.surface) { + DBG(("%s: copy to batch: %d @ %d\n", __FUNCTION__, + sna->render.vertex_used, sna->kgem.nbatch)); + memcpy(sna->kgem.batch + sna->kgem.nbatch, + sna->render.vertex_data, + sna->render.vertex_used * 4); + delta = sna->kgem.nbatch * 4; + bo = NULL; + sna->kgem.nbatch += sna->render.vertex_used; + } else { + bo = kgem_create_linear(&sna->kgem, 4*sna->render.vertex_used); + if (bo && !kgem_bo_write(&sna->kgem, bo, + sna->render.vertex_data, + 4*sna->render.vertex_used)) { + kgem_bo_destroy(&sna->kgem, bo); + return; + } + delta = 0; + DBG(("%s: new vbo: %d\n", __FUNCTION__, + sna->render.vertex_used)); + } + + for (i = 0; i < ARRAY_SIZE(sna->render.vertex_reloc); i++) { + if (sna->render.vertex_reloc[i]) { + DBG(("%s: reloc[%d] = %d\n", __FUNCTION__, + i, sna->render.vertex_reloc[i])); + + sna->kgem.batch[sna->render.vertex_reloc[i]] = + kgem_add_reloc(&sna->kgem, + sna->render.vertex_reloc[i], + bo, + I915_GEM_DOMAIN_VERTEX << 16, + delta); + sna->kgem.batch[sna->render.vertex_reloc[i]+1] = + kgem_add_reloc(&sna->kgem, + sna->render.vertex_reloc[i]+1, + bo, + I915_GEM_DOMAIN_VERTEX << 16, + delta + sna->render.vertex_used * 4 - 1); + sna->render.vertex_reloc[i] = 0; + } + } + + if (bo) + kgem_bo_destroy(&sna->kgem, bo); + + sna->render.vertex_used = 0; + sna->render.vertex_index = 0; + sna->render_state.gen6.vb_id = 0; +} + +typedef struct gen6_surface_state_padded { + struct gen6_surface_state state; + char pad[32 - sizeof(struct gen6_surface_state)]; +} gen6_surface_state_padded; + +static void null_create(struct sna_static_stream *stream) +{ + /* A bunch of zeros useful for legacy border color and depth-stencil */ + sna_static_stream_map(stream, 64, 64); +} + +static void +sampler_state_init(struct gen6_sampler_state *sampler_state, + sampler_filter_t filter, + sampler_extend_t extend) +{ + sampler_state->ss0.lod_preclamp = 1; /* GL mode */ + + /* We use the legacy mode to get the semantics specified by + * the Render extension. */ + sampler_state->ss0.border_color_mode = GEN6_BORDER_COLOR_MODE_LEGACY; + + switch (filter) { + default: + case SAMPLER_FILTER_NEAREST: + sampler_state->ss0.min_filter = GEN6_MAPFILTER_NEAREST; + sampler_state->ss0.mag_filter = GEN6_MAPFILTER_NEAREST; + break; + case SAMPLER_FILTER_BILINEAR: + sampler_state->ss0.min_filter = GEN6_MAPFILTER_LINEAR; + sampler_state->ss0.mag_filter = GEN6_MAPFILTER_LINEAR; + break; + } + + switch (extend) { + default: + case SAMPLER_EXTEND_NONE: + sampler_state->ss1.r_wrap_mode = GEN6_TEXCOORDMODE_CLAMP_BORDER; + sampler_state->ss1.s_wrap_mode = GEN6_TEXCOORDMODE_CLAMP_BORDER; + sampler_state->ss1.t_wrap_mode = GEN6_TEXCOORDMODE_CLAMP_BORDER; + break; + case SAMPLER_EXTEND_REPEAT: + sampler_state->ss1.r_wrap_mode = GEN6_TEXCOORDMODE_WRAP; + sampler_state->ss1.s_wrap_mode = GEN6_TEXCOORDMODE_WRAP; + sampler_state->ss1.t_wrap_mode = GEN6_TEXCOORDMODE_WRAP; + break; + case SAMPLER_EXTEND_PAD: + sampler_state->ss1.r_wrap_mode = GEN6_TEXCOORDMODE_CLAMP; + sampler_state->ss1.s_wrap_mode = GEN6_TEXCOORDMODE_CLAMP; + sampler_state->ss1.t_wrap_mode = GEN6_TEXCOORDMODE_CLAMP; + break; + case SAMPLER_EXTEND_REFLECT: + sampler_state->ss1.r_wrap_mode = GEN6_TEXCOORDMODE_MIRROR; + sampler_state->ss1.s_wrap_mode = GEN6_TEXCOORDMODE_MIRROR; + sampler_state->ss1.t_wrap_mode = GEN6_TEXCOORDMODE_MIRROR; + break; + } +} + +static uint32_t gen6_create_cc_viewport(struct sna_static_stream *stream) +{ + struct gen6_cc_viewport vp; + + vp.min_depth = -1.e35; + vp.max_depth = 1.e35; + + return sna_static_stream_add(stream, &vp, sizeof(vp), 32); +} + +static uint32_t gen6_get_card_format(PictFormat format) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(gen6_tex_formats); i++) { + if (gen6_tex_formats[i].pict_fmt == format) + return gen6_tex_formats[i].card_fmt; + } + return -1; +} + +static uint32_t +gen6_tiling_bits(uint32_t tiling) +{ + switch (tiling) { + default: assert(0); + case I915_TILING_NONE: return 0; + case I915_TILING_X: return GEN6_SURFACE_TILED; + case I915_TILING_Y: return GEN6_SURFACE_TILED | GEN6_SURFACE_TILED_Y; + } +} + +/** + * Sets up the common fields for a surface state buffer for the given + * picture in the given surface state buffer. + */ +static int +gen6_bind_bo(struct sna *sna, + struct kgem_bo *bo, + uint32_t width, + uint32_t height, + uint32_t format, + Bool is_dst) +{ + uint32_t *ss; + uint32_t domains; + uint16_t offset; + + /* After the first bind, we manage the cache domains within the batch */ + if (is_dst) { + domains = I915_GEM_DOMAIN_RENDER << 16 |I915_GEM_DOMAIN_RENDER; + kgem_bo_mark_dirty(bo); + } else { + domains = I915_GEM_DOMAIN_SAMPLER << 16; + is_dst = gen6_format_is_dst(format); + } + + offset = sna->kgem.surface - sizeof(struct gen6_surface_state_padded) / sizeof(uint32_t); + offset *= sizeof(uint32_t); + + if (is_dst) { + if (bo->dst_bound) + return bo->dst_bound; + + bo->dst_bound = offset; + } else { + if (bo->src_bound) + return bo->src_bound; + + bo->src_bound = offset; + } + + sna->kgem.surface -= + sizeof(struct gen6_surface_state_padded) / sizeof(uint32_t); + ss = sna->kgem.batch + sna->kgem.surface; + ss[0] = (GEN6_SURFACE_2D << GEN6_SURFACE_TYPE_SHIFT | + GEN6_SURFACE_BLEND_ENABLED | + format << GEN6_SURFACE_FORMAT_SHIFT); + ss[1] = kgem_add_reloc(&sna->kgem, + sna->kgem.surface + 1, + bo, domains, 0); + ss[2] = ((width - 1) << GEN6_SURFACE_WIDTH_SHIFT | + (height - 1) << GEN6_SURFACE_HEIGHT_SHIFT); + ss[3] = (gen6_tiling_bits(bo->tiling) | + (bo->pitch - 1) << GEN6_SURFACE_PITCH_SHIFT); + ss[4] = 0; + ss[5] = 0; + + DBG(("[%x] bind bo(handle=%d, addr=%d), format=%d, width=%d, height=%d, pitch=%d, tiling=%d -> %s\n", + offset, bo->handle, ss[1], + format, width, height, bo->pitch, bo->tiling, + domains & 0xffff ? "render" : "sampler")); + + return offset; +} + +fastcall static void +gen6_emit_composite_primitive_solid(struct sna *sna, + const struct sna_composite_op *op, + const struct sna_composite_rectangles *r) +{ + float *v; + union { + struct sna_coordinate p; + float f; + } dst; + + v = sna->render.vertex_data + sna->render.vertex_used; + sna->render.vertex_used += 9; + + dst.p.x = r->dst.x + r->width; + dst.p.y = r->dst.y + r->height; + v[0] = dst.f; + v[1] = 1.; + v[2] = 1.; + + dst.p.x = r->dst.x; + v[3] = dst.f; + v[4] = 0.; + v[5] = 1.; + + dst.p.y = r->dst.y; + v[6] = dst.f; + v[7] = 0.; + v[8] = 0.; +} + +fastcall static void +gen6_emit_composite_primitive_identity_source(struct sna *sna, + const struct sna_composite_op *op, + const struct sna_composite_rectangles *r) +{ + union { + struct sna_coordinate p; + float f; + } dst; + float *v; + + v = sna->render.vertex_data + sna->render.vertex_used; + sna->render.vertex_used += 9; + + dst.p.x = r->dst.x + r->width; + dst.p.y = r->dst.y + r->height; + v[0] = dst.f; + dst.p.x = r->dst.x; + v[3] = dst.f; + dst.p.y = r->dst.y; + v[6] = dst.f; + + v[7] = v[4] = (r->src.x + op->src.offset[0]) * op->src.scale[0]; + v[1] = v[4] + r->width * op->src.scale[0]; + + v[8] = (r->src.y + op->src.offset[1]) * op->src.scale[1]; + v[5] = v[2] = v[8] + r->height * op->src.scale[1]; +} + +fastcall static void +gen6_emit_composite_primitive_affine_source(struct sna *sna, + const struct sna_composite_op *op, + const struct sna_composite_rectangles *r) +{ + union { + struct sna_coordinate p; + float f; + } dst; + float *v; + + v = sna->render.vertex_data + sna->render.vertex_used; + sna->render.vertex_used += 9; + + dst.p.x = r->dst.x + r->width; + dst.p.y = r->dst.y + r->height; + v[0] = dst.f; + _sna_get_transformed_coordinates(op->src.offset[0] + r->src.x + r->width, + op->src.offset[1] + r->src.y + r->height, + op->src.transform, + &v[1], &v[2]); + v[1] *= op->src.scale[0]; + v[2] *= op->src.scale[1]; + + dst.p.x = r->dst.x; + v[3] = dst.f; + _sna_get_transformed_coordinates(op->src.offset[0] + r->src.x, + op->src.offset[1] + r->src.y + r->height, + op->src.transform, + &v[4], &v[5]); + v[4] *= op->src.scale[0]; + v[5] *= op->src.scale[1]; + + dst.p.y = r->dst.y; + v[6] = dst.f; + _sna_get_transformed_coordinates(op->src.offset[0] + r->src.x, + op->src.offset[1] + r->src.y, + op->src.transform, + &v[7], &v[8]); + v[7] *= op->src.scale[0]; + v[8] *= op->src.scale[1]; +} + +fastcall static void +gen6_emit_composite_primitive_identity_source_mask(struct sna *sna, + const struct sna_composite_op *op, + const struct sna_composite_rectangles *r) +{ + union { + struct sna_coordinate p; + float f; + } dst; + float src_x, src_y; + float msk_x, msk_y; + float w, h; + float *v; + + src_x = r->src.x + op->src.offset[0]; + src_y = r->src.y + op->src.offset[1]; + msk_x = r->mask.x + op->mask.offset[0]; + msk_y = r->mask.y + op->mask.offset[1]; + w = r->width; + h = r->height; + + v = sna->render.vertex_data + sna->render.vertex_used; + sna->render.vertex_used += 15; + + dst.p.x = r->dst.x + r->width; + dst.p.y = r->dst.y + r->height; + v[0] = dst.f; + v[1] = (src_x + w) * op->src.scale[0]; + v[2] = (src_y + h) * op->src.scale[1]; + v[3] = (msk_x + w) * op->mask.scale[0]; + v[4] = (msk_y + h) * op->mask.scale[1]; + + dst.p.x = r->dst.x; + v[5] = dst.f; + v[6] = src_x * op->src.scale[0]; + v[7] = v[2]; + v[8] = msk_x * op->mask.scale[0]; + v[9] = v[4]; + + dst.p.y = r->dst.y; + v[10] = dst.f; + v[11] = v[6]; + v[12] = src_y * op->src.scale[1]; + v[13] = v[8]; + v[14] = msk_y * op->mask.scale[1]; +} + +fastcall static void +gen6_emit_composite_primitive(struct sna *sna, + const struct sna_composite_op *op, + const struct sna_composite_rectangles *r) +{ + float src_x[3], src_y[3], src_w[3], mask_x[3], mask_y[3], mask_w[3]; + Bool is_affine = op->is_affine; + const float *src_sf = op->src.scale; + const float *mask_sf = op->mask.scale; + + if (is_affine) { + sna_get_transformed_coordinates(r->src.x + op->src.offset[0], + r->src.y + op->src.offset[1], + op->src.transform, + &src_x[0], + &src_y[0]); + + sna_get_transformed_coordinates(r->src.x + op->src.offset[0], + r->src.y + op->src.offset[1] + r->height, + op->src.transform, + &src_x[1], + &src_y[1]); + + sna_get_transformed_coordinates(r->src.x + op->src.offset[0] + r->width, + r->src.y + op->src.offset[1] + r->height, + op->src.transform, + &src_x[2], + &src_y[2]); + } else { + if (!sna_get_transformed_coordinates_3d(r->src.x + op->src.offset[0], + r->src.y + op->src.offset[1], + op->src.transform, + &src_x[0], + &src_y[0], + &src_w[0])) + return; + + if (!sna_get_transformed_coordinates_3d(r->src.x + op->src.offset[0], + r->src.y + op->src.offset[1] + r->height, + op->src.transform, + &src_x[1], + &src_y[1], + &src_w[1])) + return; + + if (!sna_get_transformed_coordinates_3d(r->src.x + op->src.offset[0] + r->width, + r->src.y + op->src.offset[1] + r->height, + op->src.transform, + &src_x[2], + &src_y[2], + &src_w[2])) + return; + } + + if (op->mask.bo) { + if (is_affine) { + sna_get_transformed_coordinates(r->mask.x + op->mask.offset[0], + r->mask.y + op->mask.offset[1], + op->mask.transform, + &mask_x[0], + &mask_y[0]); + + sna_get_transformed_coordinates(r->mask.x + op->mask.offset[0], + r->mask.y + op->mask.offset[1] + r->height, + op->mask.transform, + &mask_x[1], + &mask_y[1]); + + sna_get_transformed_coordinates(r->mask.x + op->mask.offset[0] + r->width, + r->mask.y + op->mask.offset[1] + r->height, + op->mask.transform, + &mask_x[2], + &mask_y[2]); + } else { + if (!sna_get_transformed_coordinates_3d(r->mask.x + op->mask.offset[0], + r->mask.y + op->mask.offset[1], + op->mask.transform, + &mask_x[0], + &mask_y[0], + &mask_w[0])) + return; + + if (!sna_get_transformed_coordinates_3d(r->mask.x + op->mask.offset[0], + r->mask.y + op->mask.offset[1] + r->height, + op->mask.transform, + &mask_x[1], + &mask_y[1], + &mask_w[1])) + return; + + if (!sna_get_transformed_coordinates_3d(r->mask.x + op->mask.offset[0] + r->width, + r->mask.y + op->mask.offset[1] + r->height, + op->mask.transform, + &mask_x[2], + &mask_y[2], + &mask_w[2])) + return; + } + } + + OUT_VERTEX(r->dst.x + r->width, r->dst.y + r->height); + OUT_VERTEX_F(src_x[2] * src_sf[0]); + OUT_VERTEX_F(src_y[2] * src_sf[1]); + if (!is_affine) + OUT_VERTEX_F(src_w[2]); + if (op->mask.bo) { + OUT_VERTEX_F(mask_x[2] * mask_sf[0]); + OUT_VERTEX_F(mask_y[2] * mask_sf[1]); + if (!is_affine) + OUT_VERTEX_F(mask_w[2]); + } + + OUT_VERTEX(r->dst.x, r->dst.y + r->height); + OUT_VERTEX_F(src_x[1] * src_sf[0]); + OUT_VERTEX_F(src_y[1] * src_sf[1]); + if (!is_affine) + OUT_VERTEX_F(src_w[1]); + if (op->mask.bo) { + OUT_VERTEX_F(mask_x[1] * mask_sf[0]); + OUT_VERTEX_F(mask_y[1] * mask_sf[1]); + if (!is_affine) + OUT_VERTEX_F(mask_w[1]); + } + + OUT_VERTEX(r->dst.x, r->dst.y); + OUT_VERTEX_F(src_x[0] * src_sf[0]); + OUT_VERTEX_F(src_y[0] * src_sf[1]); + if (!is_affine) + OUT_VERTEX_F(src_w[0]); + if (op->mask.bo) { + OUT_VERTEX_F(mask_x[0] * mask_sf[0]); + OUT_VERTEX_F(mask_y[0] * mask_sf[1]); + if (!is_affine) + OUT_VERTEX_F(mask_w[0]); + } +} + +static void gen6_emit_vertex_buffer(struct sna *sna, + const struct sna_composite_op *op) +{ + int id = op->u.gen6.ve_id; + + OUT_BATCH(GEN6_3DSTATE_VERTEX_BUFFERS | 3); + OUT_BATCH(id << VB0_BUFFER_INDEX_SHIFT | VB0_VERTEXDATA | + 4*op->floats_per_vertex << VB0_BUFFER_PITCH_SHIFT); + sna->render.vertex_reloc[id] = sna->kgem.nbatch; + OUT_BATCH(0); + OUT_BATCH(0); + OUT_BATCH(0); + + sna->render_state.gen6.vb_id |= 1 << id; +} + +static void gen6_emit_primitive(struct sna *sna) +{ + if (sna->kgem.nbatch == sna->render_state.gen6.last_primitive) { + sna->render_state.gen6.vertex_offset = sna->kgem.nbatch - 5; + return; + } + + OUT_BATCH(GEN6_3DPRIMITIVE | + GEN6_3DPRIMITIVE_VERTEX_SEQUENTIAL | + _3DPRIM_RECTLIST << GEN6_3DPRIMITIVE_TOPOLOGY_SHIFT | + 0 << 9 | + 4); + sna->render_state.gen6.vertex_offset = sna->kgem.nbatch; + OUT_BATCH(0); /* vertex count, to be filled in later */ + OUT_BATCH(sna->render.vertex_index); + OUT_BATCH(1); /* single instance */ + OUT_BATCH(0); /* start instance location */ + OUT_BATCH(0); /* index buffer offset, ignored */ + sna->render.vertex_start = sna->render.vertex_index; + + sna->render_state.gen6.last_primitive = sna->kgem.nbatch; +} + +static bool gen6_rectangle_begin(struct sna *sna, + const struct sna_composite_op *op) +{ + int id = op->u.gen6.ve_id; + int ndwords; + + ndwords = 0; + if ((sna->render_state.gen6.vb_id & (1 << id)) == 0) + ndwords += 5; + if (sna->render_state.gen6.vertex_offset == 0) + ndwords += op->need_magic_ca_pass ? 60 : 6; + if (ndwords == 0) + return true; + + if (!kgem_check_batch(&sna->kgem, ndwords)) + return false; + + if ((sna->render_state.gen6.vb_id & (1 << id)) == 0) + gen6_emit_vertex_buffer(sna, op); + if (sna->render_state.gen6.vertex_offset == 0) + gen6_emit_primitive(sna); + + return true; +} + +static int gen6_get_rectangles__flush(struct sna *sna, bool ca) +{ + if (!kgem_check_batch(&sna->kgem, ca ? 65 : 5)) + return 0; + if (sna->kgem.nexec > KGEM_EXEC_SIZE(&sna->kgem) - 1) + return 0; + if (sna->kgem.nreloc > KGEM_RELOC_SIZE(&sna->kgem) - 1) + return 0; + + gen6_vertex_finish(sna, FALSE); + sna->render.vertex_index = 0; + + return ARRAY_SIZE(sna->render.vertex_data); +} + +inline static int gen6_get_rectangles(struct sna *sna, + const struct sna_composite_op *op, + int want) +{ + int rem = vertex_space(sna); + + if (rem < 3*op->floats_per_vertex) { + DBG(("flushing vbo for %s: %d < %d\n", + __FUNCTION__, rem, 3*op->floats_per_vertex)); + rem = gen6_get_rectangles__flush(sna, op->need_magic_ca_pass); + if (rem == 0) + return 0; + } + + if (!gen6_rectangle_begin(sna, op)) + return 0; + + if (want > 1 && want * op->floats_per_vertex*3 > rem) + want = rem / (3*op->floats_per_vertex); + + sna->render.vertex_index += 3*want; + return want; +} + +inline static uint32_t *gen6_composite_get_binding_table(struct sna *sna, + const struct sna_composite_op *op, + uint16_t *offset) +{ + uint32_t *table; + + sna->kgem.surface -= + sizeof(struct gen6_surface_state_padded) / sizeof(uint32_t); + /* Clear all surplus entries to zero in case of prefetch */ + table = memset(sna->kgem.batch + sna->kgem.surface, + 0, sizeof(struct gen6_surface_state_padded)); + + DBG(("%s(%x)\n", __FUNCTION__, 4*sna->kgem.surface)); + + *offset = sna->kgem.surface; + return table; +} + +static uint32_t +gen6_choose_composite_vertex_buffer(struct sna *sna, + const struct sna_composite_op *op) +{ + int has_mask = op->mask.bo != NULL; + int is_affine = op->is_affine; + return has_mask << 1 | is_affine; +} + +static void +gen6_get_batch(struct sna *sna) +{ + kgem_set_mode(&sna->kgem, KGEM_RENDER); + + if (!kgem_check_batch_with_surfaces(&sna->kgem, 150, 4)) { + DBG(("%s: flushing batch: %d < %d+%d\n", + __FUNCTION__, sna->kgem.surface - sna->kgem.nbatch, + 150, 4*8)); + kgem_submit(&sna->kgem); + } + + if (sna->render_state.gen6.needs_invariant) + gen6_emit_invariant(sna); +} + +static void gen6_emit_composite_state(struct sna *sna, + const struct sna_composite_op *op) +{ + uint32_t *binding_table; + uint16_t offset; + + gen6_get_batch(sna); + + binding_table = gen6_composite_get_binding_table(sna, op, &offset); + + binding_table[0] = + gen6_bind_bo(sna, + op->dst.bo, op->dst.width, op->dst.height, + gen6_get_dest_format(op->dst.format), + TRUE); + binding_table[1] = + gen6_bind_bo(sna, + op->src.bo, op->src.width, op->src.height, + op->src.card_format, + FALSE); + if (op->mask.bo) { + binding_table[2] = + gen6_bind_bo(sna, + op->mask.bo, + op->mask.width, + op->mask.height, + op->mask.card_format, + FALSE); + } + + if (sna->kgem.surface == offset&& + *(uint64_t *)(sna->kgem.batch + sna->render_state.gen6.surface_table) == *(uint64_t*)binding_table && + (op->mask.bo == NULL || + sna->kgem.batch[sna->render_state.gen6.surface_table+2] == binding_table[2])) { + sna->kgem.surface += sizeof(struct gen6_surface_state_padded) / sizeof(uint32_t); + offset = sna->render_state.gen6.surface_table; + } + + gen6_emit_state(sna, op, offset); +} + +static void +gen6_align_vertex(struct sna *sna, const struct sna_composite_op *op) +{ + if (op->floats_per_vertex != sna->render_state.gen6.floats_per_vertex) { + DBG(("aligning vertex: was %d, now %d floats per vertex, %d->%d\n", + sna->render_state.gen6.floats_per_vertex, + op->floats_per_vertex, + sna->render.vertex_index, + (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex)); + sna->render.vertex_index = (sna->render.vertex_used + op->floats_per_vertex - 1) / op->floats_per_vertex; + sna->render.vertex_used = sna->render.vertex_index * op->floats_per_vertex; + sna->render_state.gen6.floats_per_vertex = op->floats_per_vertex; + } +} + +fastcall static void +gen6_render_composite_blt(struct sna *sna, + const struct sna_composite_op *op, + const struct sna_composite_rectangles *r) +{ + if (!gen6_get_rectangles(sna, op, 1)) { + gen6_emit_composite_state(sna, op); + gen6_get_rectangles(sna, op, 1); + } + + op->prim_emit(sna, op, r); +} + +static void +gen6_render_composite_boxes(struct sna *sna, + const struct sna_composite_op *op, + const BoxRec *box, int nbox) +{ + DBG(("composite_boxes(%d)\n", nbox)); + + do { + int nbox_this_time = gen6_get_rectangles(sna, op, nbox); + if (nbox_this_time == 0) { + gen6_emit_composite_state(sna, op); + nbox_this_time = gen6_get_rectangles(sna, op, nbox); + } + nbox -= nbox_this_time; + do { + struct sna_composite_rectangles r; + + DBG((" %s: (%d, %d), (%d, %d)\n", + __FUNCTION__, + box->x1, box->y1, box->x2, box->y2)); + + r.dst.x = box->x1; + r.dst.y = box->y1; + r.width = box->x2 - box->x1; + r.height = box->y2 - box->y1; + r.src = r.mask = r.dst; + + op->prim_emit(sna, op, &r); + box++; + } while (--nbox_this_time); + } while (nbox); +} + +#ifndef MAX +#define MAX(a,b) ((a) > (b) ? (a) : (b)) +#endif + +static uint32_t +gen6_composite_create_blend_state(struct sna_static_stream *stream) +{ + char *base, *ptr; + int src, dst; + + base = sna_static_stream_map(stream, + GEN6_BLENDFACTOR_COUNT * GEN6_BLENDFACTOR_COUNT * GEN6_BLEND_STATE_PADDED_SIZE, + 64); + + ptr = base; + for (src = 0; src < GEN6_BLENDFACTOR_COUNT; src++) { + for (dst= 0; dst < GEN6_BLENDFACTOR_COUNT; dst++) { + struct gen6_blend_state *blend = + (struct gen6_blend_state *)ptr; + + blend->blend0.dest_blend_factor = dst; + blend->blend0.source_blend_factor = src; + blend->blend0.blend_func = GEN6_BLENDFUNCTION_ADD; + blend->blend0.blend_enable = 1; + + blend->blend1.post_blend_clamp_enable = 1; + blend->blend1.pre_blend_clamp_enable = 1; + + ptr += GEN6_BLEND_STATE_PADDED_SIZE; + } + } + + return sna_static_stream_offsetof(stream, base); +} + +static uint32_t gen6_bind_video_source(struct sna *sna, + struct kgem_bo *src_bo, + uint32_t src_offset, + int src_width, + int src_height, + int src_pitch, + uint32_t src_surf_format) +{ + struct gen6_surface_state *ss; + + sna->kgem.surface -= sizeof(struct gen6_surface_state_padded) / sizeof(uint32_t); + + ss = memset(sna->kgem.batch + sna->kgem.surface, 0, sizeof(*ss)); + ss->ss0.surface_type = GEN6_SURFACE_2D; + ss->ss0.surface_format = src_surf_format; + ss->ss0.color_blend = 1; + + ss->ss1.base_addr = + kgem_add_reloc(&sna->kgem, + sna->kgem.surface + 1, + src_bo, + I915_GEM_DOMAIN_SAMPLER << 16, + src_offset); + + ss->ss2.width = src_width - 1; + ss->ss2.height = src_height - 1; + ss->ss3.pitch = src_pitch - 1; + + return sna->kgem.surface * sizeof(uint32_t); +} + +static void gen6_emit_video_state(struct sna *sna, + struct sna_composite_op *op, + struct sna_video_frame *frame) +{ + uint32_t src_surf_format; + uint32_t src_surf_base[6]; + int src_width[6]; + int src_height[6]; + int src_pitch[6]; + uint32_t *binding_table; + uint16_t offset; + int n_src, n; + + gen6_get_batch(sna); + + src_surf_base[0] = frame->YBufOffset; + src_surf_base[1] = frame->YBufOffset; + src_surf_base[2] = frame->VBufOffset; + src_surf_base[3] = frame->VBufOffset; + src_surf_base[4] = frame->UBufOffset; + src_surf_base[5] = frame->UBufOffset; + + if (is_planar_fourcc(frame->id)) { + src_surf_format = GEN6_SURFACEFORMAT_R8_UNORM; + src_width[1] = src_width[0] = frame->width; + src_height[1] = src_height[0] = frame->height; + src_pitch[1] = src_pitch[0] = frame->pitch[1]; + src_width[4] = src_width[5] = src_width[2] = src_width[3] = + frame->width / 2; + src_height[4] = src_height[5] = src_height[2] = src_height[3] = + frame->height / 2; + src_pitch[4] = src_pitch[5] = src_pitch[2] = src_pitch[3] = + frame->pitch[0]; + n_src = 6; + } else { + if (frame->id == FOURCC_UYVY) + src_surf_format = GEN6_SURFACEFORMAT_YCRCB_SWAPY; + else + src_surf_format = GEN6_SURFACEFORMAT_YCRCB_NORMAL; + + src_width[0] = frame->width; + src_height[0] = frame->height; + src_pitch[0] = frame->pitch[0]; + n_src = 1; + } + + binding_table = gen6_composite_get_binding_table(sna, op, &offset); + + binding_table[0] = + gen6_bind_bo(sna, + op->dst.bo, op->dst.width, op->dst.height, + gen6_get_dest_format(op->dst.format), + TRUE); + for (n = 0; n < n_src; n++) { + binding_table[1+n] = + gen6_bind_video_source(sna, + frame->bo, + src_surf_base[n], + src_width[n], + src_height[n], + src_pitch[n], + src_surf_format); + } + + gen6_emit_state(sna, op, offset); +} + +static Bool +gen6_render_video(struct sna *sna, + struct sna_video *video, + struct sna_video_frame *frame, + RegionPtr dstRegion, + short src_w, short src_h, + short drw_w, short drw_h, + PixmapPtr pixmap) +{ + struct sna_composite_op tmp; + int nbox, dxo, dyo, pix_xoff, pix_yoff; + float src_scale_x, src_scale_y; + struct sna_pixmap *priv; + BoxPtr box; + + DBG(("%s: src=(%d, %d), dst=(%d, %d), %dx[(%d, %d), (%d, %d)...]\n", + __FUNCTION__, src_w, src_h, drw_w, drw_h, + REGION_NUM_RECTS(dstRegion), + REGION_EXTENTS(NULL, dstRegion)->x1, + REGION_EXTENTS(NULL, dstRegion)->y1, + REGION_EXTENTS(NULL, dstRegion)->x2, + REGION_EXTENTS(NULL, dstRegion)->y2)); + + priv = sna_pixmap_force_to_gpu(pixmap); + if (priv == NULL) + return FALSE; + + memset(&tmp, 0, sizeof(tmp)); + + tmp.op = PictOpSrc; + tmp.dst.pixmap = pixmap; + tmp.dst.width = pixmap->drawable.width; + tmp.dst.height = pixmap->drawable.height; + tmp.dst.format = sna_format_for_depth(pixmap->drawable.depth); + tmp.dst.bo = priv->gpu_bo; + + tmp.src.filter = SAMPLER_FILTER_BILINEAR; + tmp.src.repeat = SAMPLER_EXTEND_NONE; + + tmp.is_affine = TRUE; + tmp.floats_per_vertex = 3; + + if (is_planar_fourcc(frame->id)) { + tmp.u.gen6.wm_kernel = GEN6_WM_KERNEL_VIDEO_PLANAR; + tmp.u.gen6.nr_surfaces = 7; + } else { + tmp.u.gen6.wm_kernel = GEN6_WM_KERNEL_VIDEO_PACKED; + tmp.u.gen6.nr_surfaces = 2; + } + tmp.u.gen6.nr_inputs = 1; + tmp.u.gen6.ve_id = 1; + + if (!kgem_check_bo(&sna->kgem, tmp.dst.bo)) + kgem_submit(&sna->kgem); + if (!kgem_check_bo(&sna->kgem, frame->bo)) + kgem_submit(&sna->kgem); + + if (kgem_bo_is_dirty(frame->bo)) + kgem_emit_flush(&sna->kgem); + + gen6_emit_video_state(sna, &tmp, frame); + gen6_align_vertex(sna, &tmp); + + /* Set up the offset for translating from the given region (in screen + * coordinates) to the backing pixmap. + */ +#ifdef COMPOSITE + pix_xoff = -pixmap->screen_x + pixmap->drawable.x; + pix_yoff = -pixmap->screen_y + pixmap->drawable.y; +#else + pix_xoff = 0; + pix_yoff = 0; +#endif + + dxo = dstRegion->extents.x1; + dyo = dstRegion->extents.y1; + + /* Use normalized texture coordinates */ + src_scale_x = ((float)src_w / frame->width) / (float)drw_w; + src_scale_y = ((float)src_h / frame->height) / (float)drw_h; + + box = REGION_RECTS(dstRegion); + nbox = REGION_NUM_RECTS(dstRegion); + while (nbox--) { + BoxRec r; + + r.x1 = box->x1 + pix_xoff; + r.x2 = box->x2 + pix_xoff; + r.y1 = box->y1 + pix_yoff; + r.y2 = box->y2 + pix_yoff; + + if (!gen6_get_rectangles(sna, &tmp, 1)) { + gen6_emit_video_state(sna, &tmp, frame); + gen6_get_rectangles(sna, &tmp, 1); + } + + OUT_VERTEX(r.x2, r.y2); + OUT_VERTEX_F((box->x2 - dxo) * src_scale_x); + OUT_VERTEX_F((box->y2 - dyo) * src_scale_y); + + OUT_VERTEX(r.x1, r.y2); + OUT_VERTEX_F((box->x1 - dxo) * src_scale_x); + OUT_VERTEX_F((box->y2 - dyo) * src_scale_y); + + OUT_VERTEX(r.x1, r.y1); + OUT_VERTEX_F((box->x1 - dxo) * src_scale_x); + OUT_VERTEX_F((box->y1 - dyo) * src_scale_y); + + sna_damage_add_box(&priv->gpu_damage, &r); + sna_damage_subtract_box(&priv->cpu_damage, &r); + box++; + } + + return TRUE; +} + +static Bool +gen6_composite_solid_init(struct sna *sna, + struct sna_composite_channel *channel, + uint32_t color) +{ + DBG(("%s: color=%x\n", __FUNCTION__, color)); + + channel->filter = PictFilterNearest; + channel->repeat = RepeatNormal; + channel->is_affine = TRUE; + channel->is_solid = TRUE; + channel->transform = NULL; + channel->width = 1; + channel->height = 1; + channel->card_format = GEN6_SURFACEFORMAT_B8G8R8A8_UNORM; + + channel->bo = sna_render_get_solid(sna, color); + + channel->scale[0] = channel->scale[1] = 1; + channel->offset[0] = channel->offset[1] = 0; + return channel->bo != NULL; +} + +static int +gen6_composite_picture(struct sna *sna, + PicturePtr picture, + struct sna_composite_channel *channel, + int x, int y, + int w, int h, + int dst_x, int dst_y) +{ + PixmapPtr pixmap; + uint32_t color; + int16_t dx, dy; + + DBG(("%s: (%d, %d)x(%d, %d), dst=(%d, %d)\n", + __FUNCTION__, x, y, w, h, dst_x, dst_y)); + + channel->is_solid = FALSE; + channel->card_format = -1; + + if (sna_picture_is_solid(picture, &color)) + return gen6_composite_solid_init(sna, channel, color); + + if (picture->pDrawable == NULL) + return sna_render_picture_fixup(sna, picture, channel, + x, y, w, h, dst_x, dst_y); + + if (!gen6_check_repeat(picture)) + return sna_render_picture_fixup(sna, picture, channel, + x, y, w, h, dst_x, dst_y); + + if (!gen6_check_filter(picture)) + return sna_render_picture_fixup(sna, picture, channel, + x, y, w, h, dst_x, dst_y); + + channel->repeat = picture->repeat ? picture->repeatType : RepeatNone; + channel->filter = picture->filter; + + pixmap = get_drawable_pixmap(picture->pDrawable); + get_drawable_deltas(picture->pDrawable, pixmap, &dx, &dy); + + x += dx + picture->pDrawable->x; + y += dy + picture->pDrawable->y; + + channel->is_affine = sna_transform_is_affine(picture->transform); + if (sna_transform_is_integer_translation(picture->transform, &dx, &dy)) { + DBG(("%s: integer translation (%d, %d), removing\n", + __FUNCTION__, dx, dy)); + x += dx; + y += dy; + channel->transform = NULL; + channel->filter = PictFilterNearest; + } else + channel->transform = picture->transform; + + channel->card_format = gen6_get_card_format(picture->format); + if (channel->card_format == -1) + return sna_render_picture_convert(sna, picture, channel, pixmap, + x, y, w, h, dst_x, dst_y); + + if (pixmap->drawable.width > 8192 || pixmap->drawable.height > 8192) { + DBG(("%s: extracting from pixmap %dx%d\n", __FUNCTION__, + pixmap->drawable.width, pixmap->drawable.height)); + return sna_render_picture_extract(sna, picture, channel, + x, y, w, h, dst_x, dst_y); + } + + return sna_render_pixmap_bo(sna, channel, pixmap, + x, y, w, h, dst_x, dst_y); +} + +static void gen6_composite_channel_convert(struct sna_composite_channel *channel) +{ + channel->repeat = gen6_repeat(channel->repeat); + channel->filter = gen6_filter(channel->filter); + if (channel->card_format == -1) + channel->card_format = gen6_get_card_format(channel->pict_format); + assert(channel->card_format != -1); +} + +static void gen6_render_composite_done(struct sna *sna, + const struct sna_composite_op *op) +{ + gen6_vertex_flush(sna); + _kgem_set_mode(&sna->kgem, KGEM_RENDER); + sna->render.op = NULL; + + sna_render_composite_redirect_done(sna, op); + + if (op->src.bo) + kgem_bo_destroy(&sna->kgem, op->src.bo); + if (op->mask.bo) + kgem_bo_destroy(&sna->kgem, op->mask.bo); +} + +static Bool +gen6_composite_set_target(struct sna *sna, + struct sna_composite_op *op, + PicturePtr dst) +{ + struct sna_pixmap *priv; + + if (!gen6_check_dst_format(dst->format)) { + DBG(("%s: unsupported target format %08x\n", + __FUNCTION__, dst->format)); + return FALSE; + } + + op->dst.pixmap = get_drawable_pixmap(dst->pDrawable); + op->dst.width = op->dst.pixmap->drawable.width; + op->dst.height = op->dst.pixmap->drawable.height; + op->dst.format = dst->format; + priv = sna_pixmap(op->dst.pixmap); + + op->dst.bo = NULL; + if (priv && priv->gpu_bo == NULL) { + op->dst.bo = priv->cpu_bo; + op->damage = &priv->cpu_damage; + } + if (op->dst.bo == NULL) { + priv = sna_pixmap_force_to_gpu(op->dst.pixmap); + if (priv == NULL) + return FALSE; + + op->dst.bo = priv->gpu_bo; + if (!priv->gpu_only) + op->damage = &priv->gpu_damage; + } + + get_drawable_deltas(dst->pDrawable, op->dst.pixmap, + &op->dst.x, &op->dst.y); + + DBG(("%s: pixmap=%p, format=%08x, size=%dx%d, pitch=%d, delta=(%d,%d)\n", + __FUNCTION__, + op->dst.pixmap, (int)op->dst.format, + op->dst.width, op->dst.height, + op->dst.bo->pitch, + op->dst.x, op->dst.y)); + return TRUE; +} + +static Bool +try_blt(struct sna *sna, PicturePtr dst, int width, int height) +{ + if (sna->kgem.mode == KGEM_BLT) { + DBG(("%s: already performing BLT\n", __FUNCTION__)); + return TRUE; + } + + if (width > 8192 || height > 8192) { + DBG(("%s: operation too large for 3D pipe (%d, %d)\n", + __FUNCTION__, width, height)); + return TRUE; + } + + return FALSE; +} + +static Bool +gen6_render_composite(struct sna *sna, + uint8_t op, + PicturePtr src, + PicturePtr mask, + PicturePtr dst, + int16_t src_x, int16_t src_y, + int16_t msk_x, int16_t msk_y, + int16_t dst_x, int16_t dst_y, + int16_t width, int16_t height, + struct sna_composite_op *tmp) +{ + +#if NO_COMPOSITE + return sna_blt_composite(sna, op, + src, dst, + src_x, src_y, + dst_x, dst_y, + width, height, tmp); +#endif + + DBG(("%s: %dx%d, current mode=%d\n", __FUNCTION__, + width, height, sna->kgem.mode)); + + if (mask == NULL && + try_blt(sna, dst, width, height) && + sna_blt_composite(sna, op, + src, dst, + src_x, src_y, + dst_x, dst_y, + width, height, tmp)) + return TRUE; + + if (op >= ARRAY_SIZE(gen6_blend_op)) + return FALSE; + + if (need_tiling(sna, width, height)) + return sna_tiling_composite(sna, + op, src, mask, dst, + src_x, src_y, + msk_x, msk_y, + dst_x, dst_y, + width, height, + tmp); + + tmp->op = op; + if (!gen6_composite_set_target(sna, tmp, dst)) + return FALSE; + + if (tmp->dst.width > 8192 || tmp->dst.height > 8192) { + if (!sna_render_composite_redirect(sna, tmp, + dst_x, dst_y, width, height)) + return FALSE; + } + + switch (gen6_composite_picture(sna, src, &tmp->src, + src_x, src_y, + width, height, + dst_x, dst_y)) { + case -1: + goto cleanup_dst; + case 0: + gen6_composite_solid_init(sna, &tmp->src, 0); + case 1: + gen6_composite_channel_convert(&tmp->src); + break; + } + + tmp->is_affine = tmp->src.is_affine; + tmp->has_component_alpha = FALSE; + tmp->need_magic_ca_pass = FALSE; + + tmp->mask.bo = NULL; + tmp->mask.filter = SAMPLER_FILTER_NEAREST; + tmp->mask.repeat = SAMPLER_EXTEND_NONE; + + tmp->prim_emit = gen6_emit_composite_primitive; + if (mask) { + if (mask->componentAlpha && PICT_FORMAT_RGB(mask->format)) { + tmp->has_component_alpha = TRUE; + + /* Check if it's component alpha that relies on a source alpha and on + * the source value. We can only get one of those into the single + * source value that we get to blend with. + */ + if (gen6_blend_op[op].src_alpha && + (gen6_blend_op[op].src_blend != GEN6_BLENDFACTOR_ZERO)) { + if (op != PictOpOver) + goto cleanup_src; + + tmp->need_magic_ca_pass = TRUE; + tmp->op = PictOpOutReverse; + } + } + + switch (gen6_composite_picture(sna, mask, &tmp->mask, + msk_x, msk_y, + width, height, + dst_x, dst_y)) { + case -1: + goto cleanup_src; + case 0: + gen6_composite_solid_init(sna, &tmp->mask, 0); + case 1: + gen6_composite_channel_convert(&tmp->mask); + break; + } + + tmp->is_affine &= tmp->mask.is_affine; + + if (tmp->src.transform == NULL && tmp->mask.transform == NULL) + tmp->prim_emit = gen6_emit_composite_primitive_identity_source_mask; + + tmp->floats_per_vertex = 5 + 2 * !tmp->is_affine; + } else { + if (tmp->src.is_solid) + tmp->prim_emit = gen6_emit_composite_primitive_solid; + else if (tmp->src.transform == NULL) + tmp->prim_emit = gen6_emit_composite_primitive_identity_source; + else if (tmp->src.is_affine) + tmp->prim_emit = gen6_emit_composite_primitive_affine_source; + + tmp->floats_per_vertex = 3 + !tmp->is_affine; + } + + tmp->u.gen6.wm_kernel = + gen6_choose_composite_kernel(tmp->op, + tmp->mask.bo != NULL, + tmp->has_component_alpha, + tmp->is_affine); + tmp->u.gen6.nr_surfaces = 2 + (tmp->mask.bo != NULL); + tmp->u.gen6.nr_inputs = 1 + (tmp->mask.bo != NULL); + tmp->u.gen6.ve_id = + gen6_choose_composite_vertex_buffer(sna, tmp); + + tmp->blt = gen6_render_composite_blt; + tmp->boxes = gen6_render_composite_boxes; + tmp->done = gen6_render_composite_done; + + if (!kgem_check_bo(&sna->kgem, tmp->dst.bo)) + kgem_submit(&sna->kgem); + if (!kgem_check_bo(&sna->kgem, tmp->src.bo)) + kgem_submit(&sna->kgem); + if (!kgem_check_bo(&sna->kgem, tmp->mask.bo)) + kgem_submit(&sna->kgem); + + if (kgem_bo_is_dirty(tmp->src.bo) || kgem_bo_is_dirty(tmp->mask.bo)) + kgem_emit_flush(&sna->kgem); + + gen6_emit_composite_state(sna, tmp); + gen6_align_vertex(sna, tmp); + + sna->render.op = tmp; + return TRUE; + +cleanup_src: + if (tmp->src.bo) + kgem_bo_destroy(&sna->kgem, tmp->src.bo); +cleanup_dst: + if (tmp->redirect.real_bo) + kgem_bo_destroy(&sna->kgem, tmp->dst.bo); + return FALSE; +} + +static void +gen6_emit_copy_state(struct sna *sna, + const struct sna_composite_op *op) +{ + uint32_t *binding_table; + uint16_t offset; + + gen6_get_batch(sna); + + binding_table = gen6_composite_get_binding_table(sna, op, &offset); + + binding_table[0] = + gen6_bind_bo(sna, + op->dst.bo, op->dst.width, op->dst.height, + gen6_get_dest_format_for_depth(op->dst.pixmap->drawable.depth), + TRUE); + binding_table[1] = + gen6_bind_bo(sna, + op->src.bo, op->src.width, op->src.height, + op->src.card_format, + FALSE); + + if (sna->kgem.surface == offset && + *(uint64_t *)(sna->kgem.batch + sna->render_state.gen6.surface_table) == *(uint64_t*)binding_table) { + sna->kgem.surface += sizeof(struct gen6_surface_state_padded) / sizeof(uint32_t); + offset = sna->render_state.gen6.surface_table; + } + + gen6_emit_state(sna, op, offset); +} + +static Bool +gen6_render_copy_boxes(struct sna *sna, uint8_t alu, + PixmapPtr src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy, + PixmapPtr dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, + const BoxRec *box, int n) +{ + struct sna_composite_op tmp; + +#if NO_COPY_BOXES + return sna_blt_copy_boxes(sna, alu, + src_bo, src_dx, src_dy, + dst_bo, dst_dx, dst_dy, + dst->drawable.bitsPerPixel, + box, n); +#endif + + DBG(("%s (%d, %d)->(%d, %d) x %d, alu=%x, self-copy=%d\n", + __FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, n, alu, + src_bo == dst_bo)); + + if (sna->kgem.mode == KGEM_BLT && + sna_blt_copy_boxes(sna, alu, + src_bo, src_dx, src_dy, + dst_bo, dst_dx, dst_dy, + dst->drawable.bitsPerPixel, + box, n)) + return TRUE; + + if (!(alu == GXcopy || alu == GXclear) || src_bo == dst_bo || + src->drawable.width > 8192 || src->drawable.height > 8192 || + dst->drawable.width > 8192 || dst->drawable.height > 8192) + return sna_blt_copy_boxes(sna, alu, + src_bo, src_dx, src_dy, + dst_bo, dst_dx, dst_dy, + dst->drawable.bitsPerPixel, + box, n); + tmp.op = alu == GXcopy ? PictOpSrc : PictOpClear; + + tmp.dst.pixmap = dst; + tmp.dst.x = tmp.dst.y = 0; + tmp.dst.width = dst->drawable.width; + tmp.dst.height = dst->drawable.height; + tmp.dst.format = sna_format_for_depth(dst->drawable.depth); + tmp.dst.bo = dst_bo; + + tmp.src.bo = src_bo; + tmp.src.filter = SAMPLER_FILTER_NEAREST; + tmp.src.repeat = SAMPLER_EXTEND_NONE; + tmp.src.card_format = + gen6_get_card_format_for_depth(src->drawable.depth), + tmp.src.width = src->drawable.width; + tmp.src.height = src->drawable.height; + + tmp.mask.bo = NULL; + tmp.mask.filter = SAMPLER_FILTER_NEAREST; + tmp.mask.repeat = SAMPLER_EXTEND_NONE; + + tmp.is_affine = TRUE; + tmp.floats_per_vertex = 3; + + tmp.u.gen6.wm_kernel = GEN6_WM_KERNEL_NOMASK; + tmp.u.gen6.nr_surfaces = 2; + tmp.u.gen6.nr_inputs = 1; + tmp.u.gen6.ve_id = 1; + + if (!kgem_check_bo(&sna->kgem, dst_bo)) + kgem_submit(&sna->kgem); + if (!kgem_check_bo(&sna->kgem, src_bo)) + kgem_submit(&sna->kgem); + + if (kgem_bo_is_dirty(src_bo)) + kgem_emit_flush(&sna->kgem); + + gen6_emit_copy_state(sna, &tmp); + gen6_align_vertex(sna, &tmp); + + tmp.src.scale[0] = 1.f / src->drawable.width; + tmp.src.scale[1] = 1.f / src->drawable.height; + do { + float *v; + int n_this_time = gen6_get_rectangles(sna, &tmp, n); + if (n_this_time == 0) { + gen6_emit_copy_state(sna, &tmp); + n_this_time = gen6_get_rectangles(sna, &tmp, n); + } + n -= n_this_time; + + v = sna->render.vertex_data + sna->render.vertex_used; + sna->render.vertex_used += 9 * n_this_time; + do { + + DBG((" (%d, %d) -> (%d, %d) + (%d, %d)\n", + box->x1 + src_dx, box->y1 + src_dy, + box->x1 + dst_dx, box->y1 + dst_dy, + box->x2 - box->x1, box->y2 - box->y1)); + v[0] = pack_2s(box->x2 + dst_dx, box->y2 + dst_dy); + v[3] = pack_2s(box->x1 + dst_dx, box->y2 + dst_dy); + v[6] = pack_2s(box->x1 + dst_dx, box->y1 + dst_dy); + + v[1] = (box->x2 + src_dx) * tmp.src.scale[0]; + v[7] = v[4] = (box->x1 + src_dx) * tmp.src.scale[0]; + + v[5] = v[2] = (box->y2 + src_dy) * tmp.src.scale[1]; + v[8] = (box->y1 + src_dy) * tmp.src.scale[1]; + + v += 9; + box++; + } while (--n_this_time); + } while (n); + + gen6_vertex_flush(sna); + _kgem_set_mode(&sna->kgem, KGEM_RENDER); + return TRUE; +} + +static void +gen6_render_copy_blt(struct sna *sna, + const struct sna_copy_op *op, + int16_t sx, int16_t sy, + int16_t w, int16_t h, + int16_t dx, int16_t dy) +{ + if (!gen6_get_rectangles(sna, &op->base, 1)) { + gen6_emit_copy_state(sna, &op->base); + gen6_get_rectangles(sna, &op->base, 1); + } + + OUT_VERTEX(dx+w, dy+h); + OUT_VERTEX_F((sx+w)*op->base.src.scale[0]); + OUT_VERTEX_F((sy+h)*op->base.src.scale[1]); + + OUT_VERTEX(dx, dy+h); + OUT_VERTEX_F(sx*op->base.src.scale[0]); + OUT_VERTEX_F((sy+h)*op->base.src.scale[1]); + + OUT_VERTEX(dx, dy); + OUT_VERTEX_F(sx*op->base.src.scale[0]); + OUT_VERTEX_F(sy*op->base.src.scale[1]); +} + +static void +gen6_render_copy_done(struct sna *sna, const struct sna_copy_op *op) +{ + gen6_vertex_flush(sna); + _kgem_set_mode(&sna->kgem, KGEM_RENDER); +} + +static Bool +gen6_render_copy(struct sna *sna, uint8_t alu, + PixmapPtr src, struct kgem_bo *src_bo, + PixmapPtr dst, struct kgem_bo *dst_bo, + struct sna_copy_op *op) +{ +#if NO_COPY + return sna_blt_copy(sna, alu, + src_bo, dst_bo, + dst->drawable.bitsPerPixel, + op); +#endif + + DBG(("%s (alu=%d, src=(%dx%d), dst=(%dx%d))\n", + __FUNCTION__, alu, + src->drawable.width, src->drawable.height, + dst->drawable.width, dst->drawable.height)); + + if (sna->kgem.mode == KGEM_BLT && + sna_blt_copy(sna, alu, + src_bo, dst_bo, + dst->drawable.bitsPerPixel, + op)) + return TRUE; + + if (!(alu == GXcopy || alu == GXclear) || src_bo == dst_bo || + src->drawable.width > 8192 || src->drawable.height > 8192 || + dst->drawable.width > 8192 || dst->drawable.height > 8192) + return sna_blt_copy(sna, alu, src_bo, dst_bo, + dst->drawable.bitsPerPixel, + op); + + op->base.op = alu == GXcopy ? PictOpSrc : PictOpClear; + + op->base.dst.pixmap = dst; + op->base.dst.width = dst->drawable.width; + op->base.dst.height = dst->drawable.height; + op->base.dst.format = sna_format_for_depth(dst->drawable.depth); + op->base.dst.bo = dst_bo; + + op->base.src.bo = src_bo; + op->base.src.card_format = + gen6_get_card_format_for_depth(src->drawable.depth), + op->base.src.width = src->drawable.width; + op->base.src.height = src->drawable.height; + op->base.src.scale[0] = 1./src->drawable.width; + op->base.src.scale[1] = 1./src->drawable.height; + op->base.src.filter = SAMPLER_FILTER_NEAREST; + op->base.src.repeat = SAMPLER_EXTEND_NONE; + + op->base.is_affine = true; + op->base.floats_per_vertex = 3; + + op->base.u.gen6.wm_kernel = GEN6_WM_KERNEL_NOMASK; + op->base.u.gen6.nr_surfaces = 2; + op->base.u.gen6.nr_inputs = 1; + op->base.u.gen6.ve_id = 1; + + if (!kgem_check_bo(&sna->kgem, dst_bo)) + kgem_submit(&sna->kgem); + if (!kgem_check_bo(&sna->kgem, src_bo)) + kgem_submit(&sna->kgem); + + if (kgem_bo_is_dirty(src_bo)) + kgem_emit_flush(&sna->kgem); + + gen6_emit_copy_state(sna, &op->base); + gen6_align_vertex(sna, &op->base); + + op->blt = gen6_render_copy_blt; + op->done = gen6_render_copy_done; + return TRUE; +} + +static void +gen6_emit_fill_state(struct sna *sna, const struct sna_composite_op *op) +{ + uint32_t *binding_table; + uint16_t offset; + + gen6_get_batch(sna); + + binding_table = gen6_composite_get_binding_table(sna, op, &offset); + + binding_table[0] = + gen6_bind_bo(sna, + op->dst.bo, op->dst.width, op->dst.height, + gen6_get_dest_format(op->dst.format), + TRUE); + binding_table[1] = + gen6_bind_bo(sna, + op->src.bo, 1, 1, + GEN6_SURFACEFORMAT_B8G8R8A8_UNORM, + FALSE); + + if (sna->kgem.surface == offset && + *(uint64_t *)(sna->kgem.batch + sna->render_state.gen6.surface_table) == *(uint64_t*)binding_table) { + sna->kgem.surface += + sizeof(struct gen6_surface_state_padded)/sizeof(uint32_t); + offset = sna->render_state.gen6.surface_table; + } + + gen6_emit_state(sna, op, offset); +} + +static Bool +gen6_render_fill_boxes(struct sna *sna, + CARD8 op, + PictFormat format, + const xRenderColor *color, + PixmapPtr dst, struct kgem_bo *dst_bo, + const BoxRec *box, int n) +{ + struct sna_composite_op tmp; + uint32_t pixel; + + DBG(("%s (op=%d, color=(%04x, %04x, %04x, %04x) [%08x])\n", + __FUNCTION__, op, + color->red, color->green, color->blue, color->alpha, (int)format)); + + if (op >= ARRAY_SIZE(gen6_blend_op)) { + DBG(("%s: fallback due to unhandled blend op: %d\n", + __FUNCTION__, op)); + return FALSE; + } + + if (sna->kgem.mode == KGEM_BLT || + dst->drawable.width > 8192 || + dst->drawable.height > 8192 || + !gen6_check_dst_format(format)) { + uint8_t alu = GXcopy; + + if (op == PictOpClear) { + alu = GXclear; + pixel = 0; + op = PictOpSrc; + } + + if (op == PictOpOver && color->alpha >= 0xff00) + op = PictOpSrc; + + if (op == PictOpSrc && + sna_get_pixel_from_rgba(&pixel, + color->red, + color->green, + color->blue, + color->alpha, + format) && + sna_blt_fill_boxes(sna, alu, + dst_bo, dst->drawable.bitsPerPixel, + pixel, box, n)) + return TRUE; + + if (dst->drawable.width > 8192 || + dst->drawable.height > 8192 || + !gen6_check_dst_format(format)) + return FALSE; + } + +#if NO_FILL_BOXES + return FALSE; +#endif + + if (!sna_get_pixel_from_rgba(&pixel, + color->red, + color->green, + color->blue, + color->alpha, + PICT_a8r8g8b8)) + return FALSE; + + DBG(("%s(%08x x %d [(%d, %d), (%d, %d) ...])\n", + __FUNCTION__, pixel, n, + box[0].x1, box[0].y1, box[0].x2, box[0].y2)); + + memset(&tmp, 0, sizeof(tmp)); + + tmp.op = op; + + tmp.dst.pixmap = dst; + tmp.dst.width = dst->drawable.width; + tmp.dst.height = dst->drawable.height; + tmp.dst.format = format; + tmp.dst.bo = dst_bo; + + tmp.src.bo = sna_render_get_solid(sna, pixel); + tmp.src.filter = SAMPLER_FILTER_NEAREST; + tmp.src.repeat = SAMPLER_EXTEND_REPEAT; + + tmp.is_affine = TRUE; + tmp.floats_per_vertex = 3; + + tmp.u.gen6.wm_kernel = GEN6_WM_KERNEL_NOMASK; + tmp.u.gen6.nr_surfaces = 2; + tmp.u.gen6.nr_inputs = 1; + tmp.u.gen6.ve_id = 1; + + if (!kgem_check_bo(&sna->kgem, dst_bo)) + kgem_submit(&sna->kgem); + + gen6_emit_fill_state(sna, &tmp); + gen6_align_vertex(sna, &tmp); + + do { + int n_this_time = gen6_get_rectangles(sna, &tmp, n); + if (n_this_time == 0) { + gen6_emit_fill_state(sna, &tmp); + n_this_time = gen6_get_rectangles(sna, &tmp, n); + } + n -= n_this_time; + do { + DBG((" (%d, %d), (%d, %d)\n", + box->x1, box->y1, box->x2, box->y2)); + OUT_VERTEX(box->x2, box->y2); + OUT_VERTEX_F(1); + OUT_VERTEX_F(1); + + OUT_VERTEX(box->x1, box->y2); + OUT_VERTEX_F(0); + OUT_VERTEX_F(1); + + OUT_VERTEX(box->x1, box->y1); + OUT_VERTEX_F(0); + OUT_VERTEX_F(0); + + box++; + } while (--n_this_time); + } while (n); + + gen6_vertex_flush(sna); + kgem_bo_destroy(&sna->kgem, tmp.src.bo); + _kgem_set_mode(&sna->kgem, KGEM_RENDER); + return TRUE; +} + +static void +gen6_render_fill_blt(struct sna *sna, + const struct sna_fill_op *op, + int16_t x, int16_t y, int16_t w, int16_t h) +{ + DBG(("%s: (%d, %d)x(%d, %d)\n", __FUNCTION__, x, y, w, h)); + + if (!gen6_get_rectangles(sna, &op->base, 1)) { + gen6_emit_fill_state(sna, &op->base); + gen6_get_rectangles(sna, &op->base, 1); + } + + OUT_VERTEX(x+w, y+h); + OUT_VERTEX_F(1); + OUT_VERTEX_F(1); + + OUT_VERTEX(x, y+h); + OUT_VERTEX_F(0); + OUT_VERTEX_F(1); + + OUT_VERTEX(x, y); + OUT_VERTEX_F(0); + OUT_VERTEX_F(0); +} + +static void +gen6_render_fill_done(struct sna *sna, const struct sna_fill_op *op) +{ + gen6_vertex_flush(sna); + kgem_bo_destroy(&sna->kgem, op->base.src.bo); + _kgem_set_mode(&sna->kgem, KGEM_RENDER); +} + +static Bool +gen6_render_fill(struct sna *sna, uint8_t alu, + PixmapPtr dst, struct kgem_bo *dst_bo, + uint32_t color, + struct sna_fill_op *op) +{ + DBG(("%s: (alu=%d, color=%x)\n", __FUNCTION__, alu, color)); + +#if NO_FILL + return sna_blt_fill(sna, alu, + dst_bo, dst->drawable.bitsPerPixel, + color, + op); +#endif + + if (sna->kgem.mode == KGEM_BLT && + sna_blt_fill(sna, alu, + dst_bo, dst->drawable.bitsPerPixel, + color, + op)) + return TRUE; + + if (!(alu == GXcopy || alu == GXclear) || + dst->drawable.width > 8192 || dst->drawable.height > 8192) + return sna_blt_fill(sna, alu, + dst_bo, dst->drawable.bitsPerPixel, + color, + op); + + if (alu == GXclear) + color = 0; + + op->base.op = color == 0 ? PictOpClear : PictOpSrc; + + op->base.dst.pixmap = dst; + op->base.dst.width = dst->drawable.width; + op->base.dst.height = dst->drawable.height; + op->base.dst.format = sna_format_for_depth(dst->drawable.depth); + op->base.dst.bo = dst_bo; + + op->base.src.bo = + sna_render_get_solid(sna, + sna_rgba_for_color(color, + dst->drawable.depth)); + op->base.src.filter = SAMPLER_FILTER_NEAREST; + op->base.src.repeat = SAMPLER_EXTEND_REPEAT; + + op->base.is_affine = TRUE; + op->base.floats_per_vertex = 3; + + op->base.u.gen6.wm_kernel = GEN6_WM_KERNEL_NOMASK; + op->base.u.gen6.nr_surfaces = 2; + op->base.u.gen6.nr_inputs = 1; + op->base.u.gen6.ve_id = 1; + + if (!kgem_check_bo(&sna->kgem, dst_bo)) + kgem_submit(&sna->kgem); + + gen6_emit_fill_state(sna, &op->base); + gen6_align_vertex(sna, &op->base); + + op->blt = gen6_render_fill_blt; + op->done = gen6_render_fill_done; + return TRUE; +} + +static void gen6_render_flush(struct sna *sna) +{ + gen6_vertex_finish(sna, TRUE); +} + +static void +gen6_render_context_switch(struct sna *sna, + int new_mode) +{ + if (!new_mode) + return; + + if (sna->kgem.mode) + _kgem_submit(&sna->kgem); + + sna->kgem.ring = new_mode; +} + +static void gen6_render_reset(struct sna *sna) +{ + sna->render_state.gen6.needs_invariant = TRUE; + sna->render_state.gen6.vb_id = 0; + sna->render_state.gen6.ve_id = -1; + sna->render_state.gen6.last_primitive = -1; + + sna->render_state.gen6.num_sf_outputs = 0; + sna->render_state.gen6.samplers = -1; + sna->render_state.gen6.blend = -1; + sna->render_state.gen6.kernel = -1; + sna->render_state.gen6.drawrect_offset = -1; + sna->render_state.gen6.drawrect_limit = -1; + sna->render_state.gen6.surface_table = -1; +} + +static void gen6_render_fini(struct sna *sna) +{ + kgem_bo_destroy(&sna->kgem, sna->render_state.gen6.general_bo); +} + +static Bool gen6_render_setup(struct sna *sna) +{ + struct gen6_render_state *state = &sna->render_state.gen6; + struct sna_static_stream general; + struct gen6_sampler_state *ss; + int i, j, k, l, m; + + sna_static_stream_init(&general); + + /* Zero pad the start. If you see an offset of 0x0 in the batchbuffer + * dumps, you know it points to zero. + */ + null_create(&general); + + for (m = 0; m < GEN6_KERNEL_COUNT; m++) + state->wm_kernel[m] = + sna_static_stream_add(&general, + wm_kernels[m].data, + wm_kernels[m].size, + 64); + + ss = sna_static_stream_map(&general, + 2 * sizeof(*ss) * + FILTER_COUNT * EXTEND_COUNT * + FILTER_COUNT * EXTEND_COUNT, + 32); + state->wm_state = sna_static_stream_offsetof(&general, ss); + for (i = 0; i < FILTER_COUNT; i++) { + for (j = 0; j < EXTEND_COUNT; j++) { + for (k = 0; k < FILTER_COUNT; k++) { + for (l = 0; l < EXTEND_COUNT; l++) { + sampler_state_init(ss++, i, j); + sampler_state_init(ss++, k, l); + } + } + } + } + + state->cc_vp = gen6_create_cc_viewport(&general); + state->cc_blend = gen6_composite_create_blend_state(&general); + + state->general_bo = sna_static_stream_fini(sna, &general); + return state->general_bo != NULL; +} + +Bool gen6_render_init(struct sna *sna) +{ + if (!gen6_render_setup(sna)) + return FALSE; + + gen6_render_reset(sna); + + sna->render.composite = gen6_render_composite; + sna->render.video = gen6_render_video; + + sna->render.copy_boxes = gen6_render_copy_boxes; + sna->render.copy = gen6_render_copy; + + sna->render.fill_boxes = gen6_render_fill_boxes; + sna->render.fill = gen6_render_fill; + + sna->render.flush = gen6_render_flush; + sna->render.context_switch = gen6_render_context_switch; + sna->render.reset = gen6_render_reset; + sna->render.fini = gen6_render_fini; + + sna->render.max_3d_size = 8192; + return TRUE; +} diff --git a/src/sna/gen6_render.h b/src/sna/gen6_render.h new file mode 100644 index 00000000..42c5a6b9 --- /dev/null +++ b/src/sna/gen6_render.h @@ -0,0 +1,1598 @@ +#ifndef GEN6_RENDER_H +#define GEN6_RENDER_H + +#define GEN6_MASK(high, low) (((1 << ((high) - (low) + 1)) - 1) << (low)) + +#define GEN6_3D(Pipeline,Opcode,Subopcode) ((3 << 29) | \ + ((Pipeline) << 27) | \ + ((Opcode) << 24) | \ + ((Subopcode) << 16)) + +#define GEN6_STATE_BASE_ADDRESS GEN6_3D(0, 1, 1) +#define GEN6_STATE_SIP GEN6_3D(0, 1, 2) + +#define GEN6_PIPELINE_SELECT GEN6_3D(1, 1, 4) + +#define GEN6_MEDIA_STATE_POINTERS GEN6_3D(2, 0, 0) +#define GEN6_MEDIA_OBJECT GEN6_3D(2, 1, 0) + +#define GEN6_3DSTATE_BINDING_TABLE_POINTERS GEN6_3D(3, 0, 1) +# define GEN6_3DSTATE_BINDING_TABLE_MODIFY_PS (1 << 12)/* for GEN6 */ +# define GEN6_3DSTATE_BINDING_TABLE_MODIFY_GS (1 << 9) /* for GEN6 */ +# define GEN6_3DSTATE_BINDING_TABLE_MODIFY_VS (1 << 8) /* for GEN6 */ + +#define GEN6_3DSTATE_VERTEX_BUFFERS GEN6_3D(3, 0, 8) +#define GEN6_3DSTATE_VERTEX_ELEMENTS GEN6_3D(3, 0, 9) +#define GEN6_3DSTATE_INDEX_BUFFER GEN6_3D(3, 0, 0xa) +#define GEN6_3DSTATE_VF_STATISTICS GEN6_3D(3, 0, 0xb) + +#define GEN6_3DSTATE_DRAWING_RECTANGLE GEN6_3D(3, 1, 0) +#define GEN6_3DSTATE_CONSTANT_COLOR GEN6_3D(3, 1, 1) +#define GEN6_3DSTATE_SAMPLER_PALETTE_LOAD GEN6_3D(3, 1, 2) +#define GEN6_3DSTATE_CHROMA_KEY GEN6_3D(3, 1, 4) +#define GEN6_3DSTATE_DEPTH_BUFFER GEN6_3D(3, 1, 5) +# define GEN6_3DSTATE_DEPTH_BUFFER_TYPE_SHIFT 29 +# define GEN6_3DSTATE_DEPTH_BUFFER_FORMAT_SHIFT 18 + +#define GEN6_3DSTATE_POLY_STIPPLE_OFFSET GEN6_3D(3, 1, 6) +#define GEN6_3DSTATE_POLY_STIPPLE_PATTERN GEN6_3D(3, 1, 7) +#define GEN6_3DSTATE_LINE_STIPPLE GEN6_3D(3, 1, 8) +#define GEN6_3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP GEN6_3D(3, 1, 9) +/* These two are BLC and CTG only, not BW or CL */ +#define GEN6_3DSTATE_AA_LINE_PARAMS GEN6_3D(3, 1, 0xa) +#define GEN6_3DSTATE_GS_SVB_INDEX GEN6_3D(3, 1, 0xb) + +#define GEN6_3DPRIMITIVE GEN6_3D(3, 3, 0) + +#define GEN6_3DSTATE_CLEAR_PARAMS GEN6_3D(3, 1, 0x10) +/* DW1 */ +# define GEN6_3DSTATE_DEPTH_CLEAR_VALID (1 << 15) + +#define GEN6_3DSTATE_SAMPLER_STATE_POINTERS GEN6_3D(3, 0, 0x02) +# define GEN6_3DSTATE_SAMPLER_STATE_MODIFY_PS (1 << 12) +# define GEN6_3DSTATE_SAMPLER_STATE_MODIFY_GS (1 << 9) +# define GEN6_3DSTATE_SAMPLER_STATE_MODIFY_VS (1 << 8) + +#define GEN6_3DSTATE_URB GEN6_3D(3, 0, 0x05) +/* DW1 */ +# define GEN6_3DSTATE_URB_VS_SIZE_SHIFT 16 +# define GEN6_3DSTATE_URB_VS_ENTRIES_SHIFT 0 +/* DW2 */ +# define GEN6_3DSTATE_URB_GS_ENTRIES_SHIFT 8 +# define GEN6_3DSTATE_URB_GS_SIZE_SHIFT 0 + +#define GEN6_3DSTATE_VIEWPORT_STATE_POINTERS GEN6_3D(3, 0, 0x0d) +# define GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CC (1 << 12) +# define GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_SF (1 << 11) +# define GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CLIP (1 << 10) + +#define GEN6_3DSTATE_CC_STATE_POINTERS GEN6_3D(3, 0, 0x0e) + +#define GEN6_3DSTATE_VS GEN6_3D(3, 0, 0x10) + +#define GEN6_3DSTATE_GS GEN6_3D(3, 0, 0x11) +/* DW4 */ +# define GEN6_3DSTATE_GS_DISPATCH_START_GRF_SHIFT 0 + +#define GEN6_3DSTATE_CLIP GEN6_3D(3, 0, 0x12) + +#define GEN6_3DSTATE_SF GEN6_3D(3, 0, 0x13) +/* DW1 */ +# define GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT 22 +# define GEN6_3DSTATE_SF_URB_ENTRY_READ_LENGTH_SHIFT 11 +# define GEN6_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT 4 +/* DW2 */ +/* DW3 */ +# define GEN6_3DSTATE_SF_CULL_BOTH (0 << 29) +# define GEN6_3DSTATE_SF_CULL_NONE (1 << 29) +# define GEN6_3DSTATE_SF_CULL_FRONT (2 << 29) +# define GEN6_3DSTATE_SF_CULL_BACK (3 << 29) +/* DW4 */ +# define GEN6_3DSTATE_SF_TRI_PROVOKE_SHIFT 29 +# define GEN6_3DSTATE_SF_LINE_PROVOKE_SHIFT 27 +# define GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT 25 + +#define GEN6_3DSTATE_WM GEN6_3D(3, 0, 0x14) +/* DW2 */ +# define GEN6_3DSTATE_WM_SAMPLER_COUNT_SHIFT 27 +# define GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT 18 +/* DW4 */ +# define GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT 16 +/* DW5 */ +# define GEN6_3DSTATE_WM_MAX_THREADS_SHIFT 25 +# define GEN6_3DSTATE_WM_DISPATCH_ENABLE (1 << 19) +# define GEN6_3DSTATE_WM_16_DISPATCH_ENABLE (1 << 1) +# define GEN6_3DSTATE_WM_8_DISPATCH_ENABLE (1 << 0) +/* DW6 */ +# define GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT 20 +# define GEN6_3DSTATE_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC (1 << 15) +# define GEN6_3DSTATE_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC (1 << 14) +# define GEN6_3DSTATE_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC (1 << 13) +# define GEN6_3DSTATE_WM_PERSPECTIVE_SAMPLE_BARYCENTRIC (1 << 12) +# define GEN6_3DSTATE_WM_PERSPECTIVE_CENTROID_BARYCENTRIC (1 << 11) +# define GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC (1 << 10) + + +#define GEN6_3DSTATE_CONSTANT_VS GEN6_3D(3, 0, 0x15) +#define GEN6_3DSTATE_CONSTANT_GS GEN6_3D(3, 0, 0x16) +#define GEN6_3DSTATE_CONSTANT_PS GEN6_3D(3, 0, 0x17) + +#define GEN6_3DSTATE_SAMPLE_MASK GEN6_3D(3, 0, 0x18) + +#define GEN6_3DSTATE_MULTISAMPLE GEN6_3D(3, 1, 0x0d) +/* DW1 */ +# define GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER (0 << 4) +# define GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_UPPER_LEFT (1 << 4) +# define GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1 (0 << 1) +# define GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_4 (2 << 1) +# define GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_8 (3 << 1) + +#define PIPELINE_SELECT_3D 0 +#define PIPELINE_SELECT_MEDIA 1 + +/* for GEN6_STATE_BASE_ADDRESS */ +#define BASE_ADDRESS_MODIFY (1 << 0) + +/* for GEN6_PIPE_CONTROL */ +#define GEN6_PIPE_CONTROL_NOWRITE (0 << 14) +#define GEN6_PIPE_CONTROL_WRITE_QWORD (1 << 14) +#define GEN6_PIPE_CONTROL_WRITE_DEPTH (2 << 14) +#define GEN6_PIPE_CONTROL_WRITE_TIME (3 << 14) +#define GEN6_PIPE_CONTROL_DEPTH_STALL (1 << 13) +#define GEN6_PIPE_CONTROL_WC_FLUSH (1 << 12) +#define GEN6_PIPE_CONTROL_IS_FLUSH (1 << 11) +#define GEN6_PIPE_CONTROL_TC_FLUSH (1 << 10) +#define GEN6_PIPE_CONTROL_NOTIFY_ENABLE (1 << 8) +#define GEN6_PIPE_CONTROL_GLOBAL_GTT (1 << 2) +#define GEN6_PIPE_CONTROL_LOCAL_PGTT (0 << 2) +#define GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH (1 << 0) + +/* VERTEX_BUFFER_STATE Structure */ +#define VB0_BUFFER_INDEX_SHIFT 26 +#define VB0_VERTEXDATA (0 << 20) +#define VB0_INSTANCEDATA (1 << 20) +#define VB0_BUFFER_PITCH_SHIFT 0 + +/* VERTEX_ELEMENT_STATE Structure */ +#define VE0_VERTEX_BUFFER_INDEX_SHIFT 26 /* for GEN6 */ +#define VE0_VALID (1 << 25) /* for GEN6 */ +#define VE0_FORMAT_SHIFT 16 +#define VE0_OFFSET_SHIFT 0 +#define VE1_VFCOMPONENT_0_SHIFT 28 +#define VE1_VFCOMPONENT_1_SHIFT 24 +#define VE1_VFCOMPONENT_2_SHIFT 20 +#define VE1_VFCOMPONENT_3_SHIFT 16 +#define VE1_DESTINATION_ELEMENT_OFFSET_SHIFT 0 + +/* 3DPRIMITIVE bits */ +#define GEN6_3DPRIMITIVE_VERTEX_SEQUENTIAL (0 << 15) +#define GEN6_3DPRIMITIVE_VERTEX_RANDOM (1 << 15) +/* Primitive types are in gen6_defines.h */ +#define GEN6_3DPRIMITIVE_TOPOLOGY_SHIFT 10 + +#define GEN6_SVG_CTL 0x7400 + +#define GEN6_SVG_CTL_GS_BA (0 << 8) +#define GEN6_SVG_CTL_SS_BA (1 << 8) +#define GEN6_SVG_CTL_IO_BA (2 << 8) +#define GEN6_SVG_CTL_GS_AUB (3 << 8) +#define GEN6_SVG_CTL_IO_AUB (4 << 8) +#define GEN6_SVG_CTL_SIP (5 << 8) + +#define GEN6_SVG_RDATA 0x7404 +#define GEN6_SVG_WORK_CTL 0x7408 + +#define GEN6_VF_CTL 0x7500 + +#define GEN6_VF_CTL_SNAPSHOT_COMPLETE (1 << 31) +#define GEN6_VF_CTL_SNAPSHOT_MUX_SELECT_THREADID (0 << 8) +#define GEN6_VF_CTL_SNAPSHOT_MUX_SELECT_VF_DEBUG (1 << 8) +#define GEN6_VF_CTL_SNAPSHOT_TYPE_VERTEX_SEQUENCE (0 << 4) +#define GEN6_VF_CTL_SNAPSHOT_TYPE_VERTEX_INDEX (1 << 4) +#define GEN6_VF_CTL_SKIP_INITIAL_PRIMITIVES (1 << 3) +#define GEN6_VF_CTL_MAX_PRIMITIVES_LIMIT_ENABLE (1 << 2) +#define GEN6_VF_CTL_VERTEX_RANGE_LIMIT_ENABLE (1 << 1) +#define GEN6_VF_CTL_SNAPSHOT_ENABLE (1 << 0) + +#define GEN6_VF_STRG_VAL 0x7504 +#define GEN6_VF_STR_VL_OVR 0x7508 +#define GEN6_VF_VC_OVR 0x750c +#define GEN6_VF_STR_PSKIP 0x7510 +#define GEN6_VF_MAX_PRIM 0x7514 +#define GEN6_VF_RDATA 0x7518 + +#define GEN6_VS_CTL 0x7600 +#define GEN6_VS_CTL_SNAPSHOT_COMPLETE (1 << 31) +#define GEN6_VS_CTL_SNAPSHOT_MUX_VERTEX_0 (0 << 8) +#define GEN6_VS_CTL_SNAPSHOT_MUX_VERTEX_1 (1 << 8) +#define GEN6_VS_CTL_SNAPSHOT_MUX_VALID_COUNT (2 << 8) +#define GEN6_VS_CTL_SNAPSHOT_MUX_VS_KERNEL_POINTER (3 << 8) +#define GEN6_VS_CTL_SNAPSHOT_ALL_THREADS (1 << 2) +#define GEN6_VS_CTL_THREAD_SNAPSHOT_ENABLE (1 << 1) +#define GEN6_VS_CTL_SNAPSHOT_ENABLE (1 << 0) + +#define GEN6_VS_STRG_VAL 0x7604 +#define GEN6_VS_RDATA 0x7608 + +#define GEN6_SF_CTL 0x7b00 +#define GEN6_SF_CTL_SNAPSHOT_COMPLETE (1 << 31) +#define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_0_FF_ID (0 << 8) +#define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_0_REL_COUNT (1 << 8) +#define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_1_FF_ID (2 << 8) +#define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_1_REL_COUNT (3 << 8) +#define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_2_FF_ID (4 << 8) +#define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_2_REL_COUNT (5 << 8) +#define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_COUNT (6 << 8) +#define GEN6_SF_CTL_SNAPSHOT_MUX_SF_KERNEL_POINTER (7 << 8) +#define GEN6_SF_CTL_MIN_MAX_PRIMITIVE_RANGE_ENABLE (1 << 4) +#define GEN6_SF_CTL_DEBUG_CLIP_RECTANGLE_ENABLE (1 << 3) +#define GEN6_SF_CTL_SNAPSHOT_ALL_THREADS (1 << 2) +#define GEN6_SF_CTL_THREAD_SNAPSHOT_ENABLE (1 << 1) +#define GEN6_SF_CTL_SNAPSHOT_ENABLE (1 << 0) + +#define GEN6_SF_STRG_VAL 0x7b04 +#define GEN6_SF_RDATA 0x7b18 + +#define GEN6_WIZ_CTL 0x7c00 +#define GEN6_WIZ_CTL_SNAPSHOT_COMPLETE (1 << 31) +#define GEN6_WIZ_CTL_SUBSPAN_INSTANCE_SHIFT 16 +#define GEN6_WIZ_CTL_SNAPSHOT_MUX_WIZ_KERNEL_POINTER (0 << 8) +#define GEN6_WIZ_CTL_SNAPSHOT_MUX_SUBSPAN_INSTANCE (1 << 8) +#define GEN6_WIZ_CTL_SNAPSHOT_MUX_PRIMITIVE_SEQUENCE (2 << 8) +#define GEN6_WIZ_CTL_SINGLE_SUBSPAN_DISPATCH (1 << 6) +#define GEN6_WIZ_CTL_IGNORE_COLOR_SCOREBOARD_STALLS (1 << 5) +#define GEN6_WIZ_CTL_ENABLE_SUBSPAN_INSTANCE_COMPARE (1 << 4) +#define GEN6_WIZ_CTL_USE_UPSTREAM_SNAPSHOT_FLAG (1 << 3) +#define GEN6_WIZ_CTL_SNAPSHOT_ALL_THREADS (1 << 2) +#define GEN6_WIZ_CTL_THREAD_SNAPSHOT_ENABLE (1 << 1) +#define GEN6_WIZ_CTL_SNAPSHOT_ENABLE (1 << 0) + +#define GEN6_WIZ_STRG_VAL 0x7c04 +#define GEN6_WIZ_RDATA 0x7c18 + +#define GEN6_TS_CTL 0x7e00 +#define GEN6_TS_CTL_SNAPSHOT_COMPLETE (1 << 31) +#define GEN6_TS_CTL_SNAPSHOT_MESSAGE_ERROR (0 << 8) +#define GEN6_TS_CTL_SNAPSHOT_INTERFACE_DESCRIPTOR (3 << 8) +#define GEN6_TS_CTL_SNAPSHOT_ALL_CHILD_THREADS (1 << 2) +#define GEN6_TS_CTL_SNAPSHOT_ALL_ROOT_THREADS (1 << 1) +#define GEN6_TS_CTL_SNAPSHOT_ENABLE (1 << 0) + +#define GEN6_TS_STRG_VAL 0x7e04 +#define GEN6_TS_RDATA 0x7e08 + +#define GEN6_TD_CTL 0x8000 +#define GEN6_TD_CTL_MUX_SHIFT 8 +#define GEN6_TD_CTL_EXTERNAL_HALT_R0_DEBUG_MATCH (1 << 7) +#define GEN6_TD_CTL_FORCE_EXTERNAL_HALT (1 << 6) +#define GEN6_TD_CTL_EXCEPTION_MASK_OVERRIDE (1 << 5) +#define GEN6_TD_CTL_FORCE_THREAD_BREAKPOINT_ENABLE (1 << 4) +#define GEN6_TD_CTL_BREAKPOINT_ENABLE (1 << 2) +#define GEN6_TD_CTL2 0x8004 +#define GEN6_TD_CTL2_ILLEGAL_OPCODE_EXCEPTION_OVERRIDE (1 << 28) +#define GEN6_TD_CTL2_MASKSTACK_EXCEPTION_OVERRIDE (1 << 26) +#define GEN6_TD_CTL2_SOFTWARE_EXCEPTION_OVERRIDE (1 << 25) +#define GEN6_TD_CTL2_ACTIVE_THREAD_LIMIT_SHIFT 16 +#define GEN6_TD_CTL2_ACTIVE_THREAD_LIMIT_ENABLE (1 << 8) +#define GEN6_TD_CTL2_THREAD_SPAWNER_EXECUTION_MASK_ENABLE (1 << 7) +#define GEN6_TD_CTL2_WIZ_EXECUTION_MASK_ENABLE (1 << 6) +#define GEN6_TD_CTL2_SF_EXECUTION_MASK_ENABLE (1 << 5) +#define GEN6_TD_CTL2_CLIPPER_EXECUTION_MASK_ENABLE (1 << 4) +#define GEN6_TD_CTL2_GS_EXECUTION_MASK_ENABLE (1 << 3) +#define GEN6_TD_CTL2_VS_EXECUTION_MASK_ENABLE (1 << 0) +#define GEN6_TD_VF_VS_EMSK 0x8008 +#define GEN6_TD_GS_EMSK 0x800c +#define GEN6_TD_CLIP_EMSK 0x8010 +#define GEN6_TD_SF_EMSK 0x8014 +#define GEN6_TD_WIZ_EMSK 0x8018 +#define GEN6_TD_0_6_EHTRG_VAL 0x801c +#define GEN6_TD_0_7_EHTRG_VAL 0x8020 +#define GEN6_TD_0_6_EHTRG_MSK 0x8024 +#define GEN6_TD_0_7_EHTRG_MSK 0x8028 +#define GEN6_TD_RDATA 0x802c +#define GEN6_TD_TS_EMSK 0x8030 + +#define GEN6_EU_CTL 0x8800 +#define GEN6_EU_CTL_SELECT_SHIFT 16 +#define GEN6_EU_CTL_DATA_MUX_SHIFT 8 +#define GEN6_EU_ATT_0 0x8810 +#define GEN6_EU_ATT_1 0x8814 +#define GEN6_EU_ATT_DATA_0 0x8820 +#define GEN6_EU_ATT_DATA_1 0x8824 +#define GEN6_EU_ATT_CLR_0 0x8830 +#define GEN6_EU_ATT_CLR_1 0x8834 +#define GEN6_EU_RDATA 0x8840 + +#define GEN6_3D(Pipeline,Opcode,Subopcode) ((3 << 29) | \ + ((Pipeline) << 27) | \ + ((Opcode) << 24) | \ + ((Subopcode) << 16)) + +#define GEN6_STATE_BASE_ADDRESS GEN6_3D(0, 1, 1) +#define GEN6_STATE_SIP GEN6_3D(0, 1, 2) + +#define GEN6_PIPELINE_SELECT GEN6_3D(1, 1, 4) + +#define GEN6_MEDIA_STATE_POINTERS GEN6_3D(2, 0, 0) +#define GEN6_MEDIA_OBJECT GEN6_3D(2, 1, 0) + +#define GEN6_3DSTATE_BINDING_TABLE_POINTERS GEN6_3D(3, 0, 1) +# define GEN6_3DSTATE_BINDING_TABLE_MODIFY_PS (1 << 12)/* for GEN6 */ +# define GEN6_3DSTATE_BINDING_TABLE_MODIFY_GS (1 << 9) /* for GEN6 */ +# define GEN6_3DSTATE_BINDING_TABLE_MODIFY_VS (1 << 8) /* for GEN6 */ + +#define GEN6_3DSTATE_VERTEX_BUFFERS GEN6_3D(3, 0, 8) +#define GEN6_3DSTATE_VERTEX_ELEMENTS GEN6_3D(3, 0, 9) +#define GEN6_3DSTATE_INDEX_BUFFER GEN6_3D(3, 0, 0xa) +#define GEN6_3DSTATE_VF_STATISTICS GEN6_3D(3, 0, 0xb) + +#define GEN6_3DSTATE_DRAWING_RECTANGLE GEN6_3D(3, 1, 0) +#define GEN6_3DSTATE_CONSTANT_COLOR GEN6_3D(3, 1, 1) +#define GEN6_3DSTATE_SAMPLER_PALETTE_LOAD GEN6_3D(3, 1, 2) +#define GEN6_3DSTATE_CHROMA_KEY GEN6_3D(3, 1, 4) +#define GEN6_3DSTATE_DEPTH_BUFFER GEN6_3D(3, 1, 5) +# define GEN6_3DSTATE_DEPTH_BUFFER_TYPE_SHIFT 29 +# define GEN6_3DSTATE_DEPTH_BUFFER_FORMAT_SHIFT 18 + +#define GEN6_3DSTATE_POLY_STIPPLE_OFFSET GEN6_3D(3, 1, 6) +#define GEN6_3DSTATE_POLY_STIPPLE_PATTERN GEN6_3D(3, 1, 7) +#define GEN6_3DSTATE_LINE_STIPPLE GEN6_3D(3, 1, 8) +#define GEN6_3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP GEN6_3D(3, 1, 9) +/* These two are BLC and CTG only, not BW or CL */ +#define GEN6_3DSTATE_AA_LINE_PARAMS GEN6_3D(3, 1, 0xa) +#define GEN6_3DSTATE_GS_SVB_INDEX GEN6_3D(3, 1, 0xb) + +#define GEN6_PIPE_CONTROL GEN6_3D(3, 2, 0) + +#define GEN6_3DPRIMITIVE GEN6_3D(3, 3, 0) + +#define GEN6_3DSTATE_CLEAR_PARAMS GEN6_3D(3, 1, 0x10) +/* DW1 */ +# define GEN6_3DSTATE_DEPTH_CLEAR_VALID (1 << 15) + +/* for GEN6+ */ +#define GEN6_3DSTATE_SAMPLER_STATE_POINTERS GEN6_3D(3, 0, 0x02) +# define GEN6_3DSTATE_SAMPLER_STATE_MODIFY_PS (1 << 12) +# define GEN6_3DSTATE_SAMPLER_STATE_MODIFY_GS (1 << 9) +# define GEN6_3DSTATE_SAMPLER_STATE_MODIFY_VS (1 << 8) + +#define GEN6_3DSTATE_URB GEN6_3D(3, 0, 0x05) +/* DW1 */ +# define GEN6_3DSTATE_URB_VS_SIZE_SHIFT 16 +# define GEN6_3DSTATE_URB_VS_ENTRIES_SHIFT 0 +/* DW2 */ +# define GEN6_3DSTATE_URB_GS_ENTRIES_SHIFT 8 +# define GEN6_3DSTATE_URB_GS_SIZE_SHIFT 0 + +#define GEN6_3DSTATE_VIEWPORT_STATE_POINTERS GEN6_3D(3, 0, 0x0d) +# define GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CC (1 << 12) +# define GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_SF (1 << 11) +# define GEN6_3DSTATE_VIEWPORT_STATE_MODIFY_CLIP (1 << 10) + +#define GEN6_3DSTATE_CC_STATE_POINTERS GEN6_3D(3, 0, 0x0e) + +#define GEN6_3DSTATE_VS GEN6_3D(3, 0, 0x10) + +#define GEN6_3DSTATE_GS GEN6_3D(3, 0, 0x11) +/* DW4 */ +# define GEN6_3DSTATE_GS_DISPATCH_START_GRF_SHIFT 0 + +#define GEN6_3DSTATE_CLIP GEN6_3D(3, 0, 0x12) + +#define GEN6_3DSTATE_SF GEN6_3D(3, 0, 0x13) +/* DW1 */ +# define GEN6_3DSTATE_SF_NUM_OUTPUTS_SHIFT 22 +# define GEN6_3DSTATE_SF_URB_ENTRY_READ_LENGTH_SHIFT 11 +# define GEN6_3DSTATE_SF_URB_ENTRY_READ_OFFSET_SHIFT 4 +/* DW2 */ +/* DW3 */ +# define GEN6_3DSTATE_SF_CULL_BOTH (0 << 29) +# define GEN6_3DSTATE_SF_CULL_NONE (1 << 29) +# define GEN6_3DSTATE_SF_CULL_FRONT (2 << 29) +# define GEN6_3DSTATE_SF_CULL_BACK (3 << 29) +/* DW4 */ +# define GEN6_3DSTATE_SF_TRI_PROVOKE_SHIFT 29 +# define GEN6_3DSTATE_SF_LINE_PROVOKE_SHIFT 27 +# define GEN6_3DSTATE_SF_TRIFAN_PROVOKE_SHIFT 25 + + +#define GEN6_3DSTATE_WM GEN6_3D(3, 0, 0x14) +/* DW2 */ +# define GEN6_3DSTATE_WM_SAMPLER_COUNT_SHITF 27 +# define GEN6_3DSTATE_WM_BINDING_TABLE_ENTRY_COUNT_SHIFT 18 +/* DW4 */ +# define GEN6_3DSTATE_WM_DISPATCH_START_GRF_0_SHIFT 16 +/* DW5 */ +# define GEN6_3DSTATE_WM_MAX_THREADS_SHIFT 25 +# define GEN6_3DSTATE_WM_DISPATCH_ENABLE (1 << 19) +# define GEN6_3DSTATE_WM_16_DISPATCH_ENABLE (1 << 1) +# define GEN6_3DSTATE_WM_8_DISPATCH_ENABLE (1 << 0) +/* DW6 */ +# define GEN6_3DSTATE_WM_NUM_SF_OUTPUTS_SHIFT 20 +# define GEN6_3DSTATE_WM_NONPERSPECTIVE_SAMPLE_BARYCENTRIC (1 << 15) +# define GEN6_3DSTATE_WM_NONPERSPECTIVE_CENTROID_BARYCENTRIC (1 << 14) +# define GEN6_3DSTATE_WM_NONPERSPECTIVE_PIXEL_BARYCENTRIC (1 << 13) +# define GEN6_3DSTATE_WM_PERSPECTIVE_SAMPLE_BARYCENTRIC (1 << 12) +# define GEN6_3DSTATE_WM_PERSPECTIVE_CENTROID_BARYCENTRIC (1 << 11) +# define GEN6_3DSTATE_WM_PERSPECTIVE_PIXEL_BARYCENTRIC (1 << 10) + + +#define GEN6_3DSTATE_CONSTANT_VS GEN6_3D(3, 0, 0x15) +#define GEN6_3DSTATE_CONSTANT_GS GEN6_3D(3, 0, 0x16) +#define GEN6_3DSTATE_CONSTANT_PS GEN6_3D(3, 0, 0x17) + +#define GEN6_3DSTATE_SAMPLE_MASK GEN6_3D(3, 0, 0x18) + +#define GEN6_3DSTATE_MULTISAMPLE GEN6_3D(3, 1, 0x0d) +/* DW1 */ +# define GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_CENTER (0 << 4) +# define GEN6_3DSTATE_MULTISAMPLE_PIXEL_LOCATION_UPPER_LEFT (1 << 4) +# define GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_1 (0 << 1) +# define GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_4 (2 << 1) +# define GEN6_3DSTATE_MULTISAMPLE_NUMSAMPLES_8 (3 << 1) + +#define PIPELINE_SELECT_3D 0 +#define PIPELINE_SELECT_MEDIA 1 + +#define UF0_CS_REALLOC (1 << 13) +#define UF0_VFE_REALLOC (1 << 12) +#define UF0_SF_REALLOC (1 << 11) +#define UF0_CLIP_REALLOC (1 << 10) +#define UF0_GS_REALLOC (1 << 9) +#define UF0_VS_REALLOC (1 << 8) +#define UF1_CLIP_FENCE_SHIFT 20 +#define UF1_GS_FENCE_SHIFT 10 +#define UF1_VS_FENCE_SHIFT 0 +#define UF2_CS_FENCE_SHIFT 20 +#define UF2_VFE_FENCE_SHIFT 10 +#define UF2_SF_FENCE_SHIFT 0 + +/* for GEN6_STATE_BASE_ADDRESS */ +#define BASE_ADDRESS_MODIFY (1 << 0) + +/* for GEN6_3DSTATE_PIPELINED_POINTERS */ +#define GEN6_GS_DISABLE 0 +#define GEN6_GS_ENABLE 1 +#define GEN6_CLIP_DISABLE 0 +#define GEN6_CLIP_ENABLE 1 + +/* for GEN6_PIPE_CONTROL */ +#define GEN6_PIPE_CONTROL_NOWRITE (0 << 14) +#define GEN6_PIPE_CONTROL_WRITE_QWORD (1 << 14) +#define GEN6_PIPE_CONTROL_WRITE_DEPTH (2 << 14) +#define GEN6_PIPE_CONTROL_WRITE_TIME (3 << 14) +#define GEN6_PIPE_CONTROL_DEPTH_STALL (1 << 13) +#define GEN6_PIPE_CONTROL_WC_FLUSH (1 << 12) +#define GEN6_PIPE_CONTROL_IS_FLUSH (1 << 11) +#define GEN6_PIPE_CONTROL_TC_FLUSH (1 << 10) +#define GEN6_PIPE_CONTROL_NOTIFY_ENABLE (1 << 8) +#define GEN6_PIPE_CONTROL_GLOBAL_GTT (1 << 2) +#define GEN6_PIPE_CONTROL_LOCAL_PGTT (0 << 2) +#define GEN6_PIPE_CONTROL_DEPTH_CACHE_FLUSH (1 << 0) + +/* 3DPRIMITIVE bits */ +#define GEN6_3DPRIMITIVE_VERTEX_SEQUENTIAL (0 << 15) +#define GEN6_3DPRIMITIVE_VERTEX_RANDOM (1 << 15) +/* Primitive types are in gen6_defines.h */ +#define GEN6_3DPRIMITIVE_TOPOLOGY_SHIFT 10 + +#define GEN6_SVG_CTL 0x7400 + +#define GEN6_SVG_CTL_GS_BA (0 << 8) +#define GEN6_SVG_CTL_SS_BA (1 << 8) +#define GEN6_SVG_CTL_IO_BA (2 << 8) +#define GEN6_SVG_CTL_GS_AUB (3 << 8) +#define GEN6_SVG_CTL_IO_AUB (4 << 8) +#define GEN6_SVG_CTL_SIP (5 << 8) + +#define GEN6_SVG_RDATA 0x7404 +#define GEN6_SVG_WORK_CTL 0x7408 + +#define GEN6_VF_CTL 0x7500 + +#define GEN6_VF_CTL_SNAPSHOT_COMPLETE (1 << 31) +#define GEN6_VF_CTL_SNAPSHOT_MUX_SELECT_THREADID (0 << 8) +#define GEN6_VF_CTL_SNAPSHOT_MUX_SELECT_VF_DEBUG (1 << 8) +#define GEN6_VF_CTL_SNAPSHOT_TYPE_VERTEX_SEQUENCE (0 << 4) +#define GEN6_VF_CTL_SNAPSHOT_TYPE_VERTEX_INDEX (1 << 4) +#define GEN6_VF_CTL_SKIP_INITIAL_PRIMITIVES (1 << 3) +#define GEN6_VF_CTL_MAX_PRIMITIVES_LIMIT_ENABLE (1 << 2) +#define GEN6_VF_CTL_VERTEX_RANGE_LIMIT_ENABLE (1 << 1) +#define GEN6_VF_CTL_SNAPSHOT_ENABLE (1 << 0) + +#define GEN6_VF_STRG_VAL 0x7504 +#define GEN6_VF_STR_VL_OVR 0x7508 +#define GEN6_VF_VC_OVR 0x750c +#define GEN6_VF_STR_PSKIP 0x7510 +#define GEN6_VF_MAX_PRIM 0x7514 +#define GEN6_VF_RDATA 0x7518 + +#define GEN6_VS_CTL 0x7600 +#define GEN6_VS_CTL_SNAPSHOT_COMPLETE (1 << 31) +#define GEN6_VS_CTL_SNAPSHOT_MUX_VERTEX_0 (0 << 8) +#define GEN6_VS_CTL_SNAPSHOT_MUX_VERTEX_1 (1 << 8) +#define GEN6_VS_CTL_SNAPSHOT_MUX_VALID_COUNT (2 << 8) +#define GEN6_VS_CTL_SNAPSHOT_MUX_VS_KERNEL_POINTER (3 << 8) +#define GEN6_VS_CTL_SNAPSHOT_ALL_THREADS (1 << 2) +#define GEN6_VS_CTL_THREAD_SNAPSHOT_ENABLE (1 << 1) +#define GEN6_VS_CTL_SNAPSHOT_ENABLE (1 << 0) + +#define GEN6_VS_STRG_VAL 0x7604 +#define GEN6_VS_RDATA 0x7608 + +#define GEN6_SF_CTL 0x7b00 +#define GEN6_SF_CTL_SNAPSHOT_COMPLETE (1 << 31) +#define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_0_FF_ID (0 << 8) +#define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_0_REL_COUNT (1 << 8) +#define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_1_FF_ID (2 << 8) +#define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_1_REL_COUNT (3 << 8) +#define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_2_FF_ID (4 << 8) +#define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_2_REL_COUNT (5 << 8) +#define GEN6_SF_CTL_SNAPSHOT_MUX_VERTEX_COUNT (6 << 8) +#define GEN6_SF_CTL_SNAPSHOT_MUX_SF_KERNEL_POINTER (7 << 8) +#define GEN6_SF_CTL_MIN_MAX_PRIMITIVE_RANGE_ENABLE (1 << 4) +#define GEN6_SF_CTL_DEBUG_CLIP_RECTANGLE_ENABLE (1 << 3) +#define GEN6_SF_CTL_SNAPSHOT_ALL_THREADS (1 << 2) +#define GEN6_SF_CTL_THREAD_SNAPSHOT_ENABLE (1 << 1) +#define GEN6_SF_CTL_SNAPSHOT_ENABLE (1 << 0) + +#define GEN6_SF_STRG_VAL 0x7b04 +#define GEN6_SF_RDATA 0x7b18 + +#define GEN6_WIZ_CTL 0x7c00 +#define GEN6_WIZ_CTL_SNAPSHOT_COMPLETE (1 << 31) +#define GEN6_WIZ_CTL_SUBSPAN_INSTANCE_SHIFT 16 +#define GEN6_WIZ_CTL_SNAPSHOT_MUX_WIZ_KERNEL_POINTER (0 << 8) +#define GEN6_WIZ_CTL_SNAPSHOT_MUX_SUBSPAN_INSTANCE (1 << 8) +#define GEN6_WIZ_CTL_SNAPSHOT_MUX_PRIMITIVE_SEQUENCE (2 << 8) +#define GEN6_WIZ_CTL_SINGLE_SUBSPAN_DISPATCH (1 << 6) +#define GEN6_WIZ_CTL_IGNORE_COLOR_SCOREBOARD_STALLS (1 << 5) +#define GEN6_WIZ_CTL_ENABLE_SUBSPAN_INSTANCE_COMPARE (1 << 4) +#define GEN6_WIZ_CTL_USE_UPSTREAM_SNAPSHOT_FLAG (1 << 3) +#define GEN6_WIZ_CTL_SNAPSHOT_ALL_THREADS (1 << 2) +#define GEN6_WIZ_CTL_THREAD_SNAPSHOT_ENABLE (1 << 1) +#define GEN6_WIZ_CTL_SNAPSHOT_ENABLE (1 << 0) + +#define GEN6_WIZ_STRG_VAL 0x7c04 +#define GEN6_WIZ_RDATA 0x7c18 + +#define GEN6_TS_CTL 0x7e00 +#define GEN6_TS_CTL_SNAPSHOT_COMPLETE (1 << 31) +#define GEN6_TS_CTL_SNAPSHOT_MESSAGE_ERROR (0 << 8) +#define GEN6_TS_CTL_SNAPSHOT_INTERFACE_DESCRIPTOR (3 << 8) +#define GEN6_TS_CTL_SNAPSHOT_ALL_CHILD_THREADS (1 << 2) +#define GEN6_TS_CTL_SNAPSHOT_ALL_ROOT_THREADS (1 << 1) +#define GEN6_TS_CTL_SNAPSHOT_ENABLE (1 << 0) + +#define GEN6_TS_STRG_VAL 0x7e04 +#define GEN6_TS_RDATA 0x7e08 + +#define GEN6_TD_CTL 0x8000 +#define GEN6_TD_CTL_MUX_SHIFT 8 +#define GEN6_TD_CTL_EXTERNAL_HALT_R0_DEBUG_MATCH (1 << 7) +#define GEN6_TD_CTL_FORCE_EXTERNAL_HALT (1 << 6) +#define GEN6_TD_CTL_EXCEPTION_MASK_OVERRIDE (1 << 5) +#define GEN6_TD_CTL_FORCE_THREAD_BREAKPOINT_ENABLE (1 << 4) +#define GEN6_TD_CTL_BREAKPOINT_ENABLE (1 << 2) +#define GEN6_TD_CTL2 0x8004 +#define GEN6_TD_CTL2_ILLEGAL_OPCODE_EXCEPTION_OVERRIDE (1 << 28) +#define GEN6_TD_CTL2_MASKSTACK_EXCEPTION_OVERRIDE (1 << 26) +#define GEN6_TD_CTL2_SOFTWARE_EXCEPTION_OVERRIDE (1 << 25) +#define GEN6_TD_CTL2_ACTIVE_THREAD_LIMIT_SHIFT 16 +#define GEN6_TD_CTL2_ACTIVE_THREAD_LIMIT_ENABLE (1 << 8) +#define GEN6_TD_CTL2_THREAD_SPAWNER_EXECUTION_MASK_ENABLE (1 << 7) +#define GEN6_TD_CTL2_WIZ_EXECUTION_MASK_ENABLE (1 << 6) +#define GEN6_TD_CTL2_SF_EXECUTION_MASK_ENABLE (1 << 5) +#define GEN6_TD_CTL2_CLIPPER_EXECUTION_MASK_ENABLE (1 << 4) +#define GEN6_TD_CTL2_GS_EXECUTION_MASK_ENABLE (1 << 3) +#define GEN6_TD_CTL2_VS_EXECUTION_MASK_ENABLE (1 << 0) +#define GEN6_TD_VF_VS_EMSK 0x8008 +#define GEN6_TD_GS_EMSK 0x800c +#define GEN6_TD_CLIP_EMSK 0x8010 +#define GEN6_TD_SF_EMSK 0x8014 +#define GEN6_TD_WIZ_EMSK 0x8018 +#define GEN6_TD_0_6_EHTRG_VAL 0x801c +#define GEN6_TD_0_7_EHTRG_VAL 0x8020 +#define GEN6_TD_0_6_EHTRG_MSK 0x8024 +#define GEN6_TD_0_7_EHTRG_MSK 0x8028 +#define GEN6_TD_RDATA 0x802c +#define GEN6_TD_TS_EMSK 0x8030 + +#define GEN6_EU_CTL 0x8800 +#define GEN6_EU_CTL_SELECT_SHIFT 16 +#define GEN6_EU_CTL_DATA_MUX_SHIFT 8 +#define GEN6_EU_ATT_0 0x8810 +#define GEN6_EU_ATT_1 0x8814 +#define GEN6_EU_ATT_DATA_0 0x8820 +#define GEN6_EU_ATT_DATA_1 0x8824 +#define GEN6_EU_ATT_CLR_0 0x8830 +#define GEN6_EU_ATT_CLR_1 0x8834 +#define GEN6_EU_RDATA 0x8840 + +/* 3D state: + */ +#define _3DOP_3DSTATE_PIPELINED 0x0 +#define _3DOP_3DSTATE_NONPIPELINED 0x1 +#define _3DOP_3DCONTROL 0x2 +#define _3DOP_3DPRIMITIVE 0x3 + +#define _3DSTATE_PIPELINED_POINTERS 0x00 +#define _3DSTATE_BINDING_TABLE_POINTERS 0x01 +#define _3DSTATE_VERTEX_BUFFERS 0x08 +#define _3DSTATE_VERTEX_ELEMENTS 0x09 +#define _3DSTATE_INDEX_BUFFER 0x0A +#define _3DSTATE_VF_STATISTICS 0x0B +#define _3DSTATE_DRAWING_RECTANGLE 0x00 +#define _3DSTATE_CONSTANT_COLOR 0x01 +#define _3DSTATE_SAMPLER_PALETTE_LOAD 0x02 +#define _3DSTATE_CHROMA_KEY 0x04 +#define _3DSTATE_DEPTH_BUFFER 0x05 +#define _3DSTATE_POLY_STIPPLE_OFFSET 0x06 +#define _3DSTATE_POLY_STIPPLE_PATTERN 0x07 +#define _3DSTATE_LINE_STIPPLE 0x08 +#define _3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP 0x09 +#define _3DCONTROL 0x00 +#define _3DPRIMITIVE 0x00 + +#define _3DPRIM_POINTLIST 0x01 +#define _3DPRIM_LINELIST 0x02 +#define _3DPRIM_LINESTRIP 0x03 +#define _3DPRIM_TRILIST 0x04 +#define _3DPRIM_TRISTRIP 0x05 +#define _3DPRIM_TRIFAN 0x06 +#define _3DPRIM_QUADLIST 0x07 +#define _3DPRIM_QUADSTRIP 0x08 +#define _3DPRIM_LINELIST_ADJ 0x09 +#define _3DPRIM_LINESTRIP_ADJ 0x0A +#define _3DPRIM_TRILIST_ADJ 0x0B +#define _3DPRIM_TRISTRIP_ADJ 0x0C +#define _3DPRIM_TRISTRIP_REVERSE 0x0D +#define _3DPRIM_POLYGON 0x0E +#define _3DPRIM_RECTLIST 0x0F +#define _3DPRIM_LINELOOP 0x10 +#define _3DPRIM_POINTLIST_BF 0x11 +#define _3DPRIM_LINESTRIP_CONT 0x12 +#define _3DPRIM_LINESTRIP_BF 0x13 +#define _3DPRIM_LINESTRIP_CONT_BF 0x14 +#define _3DPRIM_TRIFAN_NOSTIPPLE 0x15 + +#define _3DPRIM_VERTEXBUFFER_ACCESS_SEQUENTIAL 0 +#define _3DPRIM_VERTEXBUFFER_ACCESS_RANDOM 1 + +#define GEN6_ANISORATIO_2 0 +#define GEN6_ANISORATIO_4 1 +#define GEN6_ANISORATIO_6 2 +#define GEN6_ANISORATIO_8 3 +#define GEN6_ANISORATIO_10 4 +#define GEN6_ANISORATIO_12 5 +#define GEN6_ANISORATIO_14 6 +#define GEN6_ANISORATIO_16 7 + +#define GEN6_BLENDFACTOR_ONE 0x1 +#define GEN6_BLENDFACTOR_SRC_COLOR 0x2 +#define GEN6_BLENDFACTOR_SRC_ALPHA 0x3 +#define GEN6_BLENDFACTOR_DST_ALPHA 0x4 +#define GEN6_BLENDFACTOR_DST_COLOR 0x5 +#define GEN6_BLENDFACTOR_SRC_ALPHA_SATURATE 0x6 +#define GEN6_BLENDFACTOR_CONST_COLOR 0x7 +#define GEN6_BLENDFACTOR_CONST_ALPHA 0x8 +#define GEN6_BLENDFACTOR_SRC1_COLOR 0x9 +#define GEN6_BLENDFACTOR_SRC1_ALPHA 0x0A +#define GEN6_BLENDFACTOR_ZERO 0x11 +#define GEN6_BLENDFACTOR_INV_SRC_COLOR 0x12 +#define GEN6_BLENDFACTOR_INV_SRC_ALPHA 0x13 +#define GEN6_BLENDFACTOR_INV_DST_ALPHA 0x14 +#define GEN6_BLENDFACTOR_INV_DST_COLOR 0x15 +#define GEN6_BLENDFACTOR_INV_CONST_COLOR 0x17 +#define GEN6_BLENDFACTOR_INV_CONST_ALPHA 0x18 +#define GEN6_BLENDFACTOR_INV_SRC1_COLOR 0x19 +#define GEN6_BLENDFACTOR_INV_SRC1_ALPHA 0x1A + +#define GEN6_BLENDFUNCTION_ADD 0 +#define GEN6_BLENDFUNCTION_SUBTRACT 1 +#define GEN6_BLENDFUNCTION_REVERSE_SUBTRACT 2 +#define GEN6_BLENDFUNCTION_MIN 3 +#define GEN6_BLENDFUNCTION_MAX 4 + +#define GEN6_ALPHATEST_FORMAT_UNORM8 0 +#define GEN6_ALPHATEST_FORMAT_FLOAT32 1 + +#define GEN6_CHROMAKEY_KILL_ON_ANY_MATCH 0 +#define GEN6_CHROMAKEY_REPLACE_BLACK 1 + +#define GEN6_CLIP_API_OGL 0 +#define GEN6_CLIP_API_DX 1 + +#define GEN6_CLIPMODE_NORMAL 0 +#define GEN6_CLIPMODE_CLIP_ALL 1 +#define GEN6_CLIPMODE_CLIP_NON_REJECTED 2 +#define GEN6_CLIPMODE_REJECT_ALL 3 +#define GEN6_CLIPMODE_ACCEPT_ALL 4 + +#define GEN6_CLIP_NDCSPACE 0 +#define GEN6_CLIP_SCREENSPACE 1 + +#define GEN6_COMPAREFUNCTION_ALWAYS 0 +#define GEN6_COMPAREFUNCTION_NEVER 1 +#define GEN6_COMPAREFUNCTION_LESS 2 +#define GEN6_COMPAREFUNCTION_EQUAL 3 +#define GEN6_COMPAREFUNCTION_LEQUAL 4 +#define GEN6_COMPAREFUNCTION_GREATER 5 +#define GEN6_COMPAREFUNCTION_NOTEQUAL 6 +#define GEN6_COMPAREFUNCTION_GEQUAL 7 + +#define GEN6_COVERAGE_PIXELS_HALF 0 +#define GEN6_COVERAGE_PIXELS_1 1 +#define GEN6_COVERAGE_PIXELS_2 2 +#define GEN6_COVERAGE_PIXELS_4 3 + +#define GEN6_CULLMODE_BOTH 0 +#define GEN6_CULLMODE_NONE 1 +#define GEN6_CULLMODE_FRONT 2 +#define GEN6_CULLMODE_BACK 3 + +#define GEN6_DEFAULTCOLOR_R8G8B8A8_UNORM 0 +#define GEN6_DEFAULTCOLOR_R32G32B32A32_FLOAT 1 + +#define GEN6_DEPTHFORMAT_D32_FLOAT_S8X24_UINT 0 +#define GEN6_DEPTHFORMAT_D32_FLOAT 1 +#define GEN6_DEPTHFORMAT_D24_UNORM_S8_UINT 2 +#define GEN6_DEPTHFORMAT_D16_UNORM 5 + +#define GEN6_FLOATING_POINT_IEEE_754 0 +#define GEN6_FLOATING_POINT_NON_IEEE_754 1 + +#define GEN6_FRONTWINDING_CW 0 +#define GEN6_FRONTWINDING_CCW 1 + +#define GEN6_INDEX_BYTE 0 +#define GEN6_INDEX_WORD 1 +#define GEN6_INDEX_DWORD 2 + +#define GEN6_LOGICOPFUNCTION_CLEAR 0 +#define GEN6_LOGICOPFUNCTION_NOR 1 +#define GEN6_LOGICOPFUNCTION_AND_INVERTED 2 +#define GEN6_LOGICOPFUNCTION_COPY_INVERTED 3 +#define GEN6_LOGICOPFUNCTION_AND_REVERSE 4 +#define GEN6_LOGICOPFUNCTION_INVERT 5 +#define GEN6_LOGICOPFUNCTION_XOR 6 +#define GEN6_LOGICOPFUNCTION_NAND 7 +#define GEN6_LOGICOPFUNCTION_AND 8 +#define GEN6_LOGICOPFUNCTION_EQUIV 9 +#define GEN6_LOGICOPFUNCTION_NOOP 10 +#define GEN6_LOGICOPFUNCTION_OR_INVERTED 11 +#define GEN6_LOGICOPFUNCTION_COPY 12 +#define GEN6_LOGICOPFUNCTION_OR_REVERSE 13 +#define GEN6_LOGICOPFUNCTION_OR 14 +#define GEN6_LOGICOPFUNCTION_SET 15 + +#define GEN6_MAPFILTER_NEAREST 0x0 +#define GEN6_MAPFILTER_LINEAR 0x1 +#define GEN6_MAPFILTER_ANISOTROPIC 0x2 + +#define GEN6_MIPFILTER_NONE 0 +#define GEN6_MIPFILTER_NEAREST 1 +#define GEN6_MIPFILTER_LINEAR 3 + +#define GEN6_POLYGON_FRONT_FACING 0 +#define GEN6_POLYGON_BACK_FACING 1 + +#define GEN6_PREFILTER_ALWAYS 0x0 +#define GEN6_PREFILTER_NEVER 0x1 +#define GEN6_PREFILTER_LESS 0x2 +#define GEN6_PREFILTER_EQUAL 0x3 +#define GEN6_PREFILTER_LEQUAL 0x4 +#define GEN6_PREFILTER_GREATER 0x5 +#define GEN6_PREFILTER_NOTEQUAL 0x6 +#define GEN6_PREFILTER_GEQUAL 0x7 + +#define GEN6_PROVOKING_VERTEX_0 0 +#define GEN6_PROVOKING_VERTEX_1 1 +#define GEN6_PROVOKING_VERTEX_2 2 + +#define GEN6_RASTRULE_UPPER_LEFT 0 +#define GEN6_RASTRULE_UPPER_RIGHT 1 + +#define GEN6_RENDERTARGET_CLAMPRANGE_UNORM 0 +#define GEN6_RENDERTARGET_CLAMPRANGE_SNORM 1 +#define GEN6_RENDERTARGET_CLAMPRANGE_FORMAT 2 + +#define GEN6_STENCILOP_KEEP 0 +#define GEN6_STENCILOP_ZERO 1 +#define GEN6_STENCILOP_REPLACE 2 +#define GEN6_STENCILOP_INCRSAT 3 +#define GEN6_STENCILOP_DECRSAT 4 +#define GEN6_STENCILOP_INCR 5 +#define GEN6_STENCILOP_DECR 6 +#define GEN6_STENCILOP_INVERT 7 + +#define GEN6_SURFACE_MIPMAPLAYOUT_BELOW 0 +#define GEN6_SURFACE_MIPMAPLAYOUT_RIGHT 1 + +#define GEN6_SURFACEFORMAT_R32G32B32A32_FLOAT 0x000 +#define GEN6_SURFACEFORMAT_R32G32B32A32_SINT 0x001 +#define GEN6_SURFACEFORMAT_R32G32B32A32_UINT 0x002 +#define GEN6_SURFACEFORMAT_R32G32B32A32_UNORM 0x003 +#define GEN6_SURFACEFORMAT_R32G32B32A32_SNORM 0x004 +#define GEN6_SURFACEFORMAT_R64G64_FLOAT 0x005 +#define GEN6_SURFACEFORMAT_R32G32B32X32_FLOAT 0x006 +#define GEN6_SURFACEFORMAT_R32G32B32A32_SSCALED 0x007 +#define GEN6_SURFACEFORMAT_R32G32B32A32_USCALED 0x008 +#define GEN6_SURFACEFORMAT_R32G32B32_FLOAT 0x040 +#define GEN6_SURFACEFORMAT_R32G32B32_SINT 0x041 +#define GEN6_SURFACEFORMAT_R32G32B32_UINT 0x042 +#define GEN6_SURFACEFORMAT_R32G32B32_UNORM 0x043 +#define GEN6_SURFACEFORMAT_R32G32B32_SNORM 0x044 +#define GEN6_SURFACEFORMAT_R32G32B32_SSCALED 0x045 +#define GEN6_SURFACEFORMAT_R32G32B32_USCALED 0x046 +#define GEN6_SURFACEFORMAT_R16G16B16A16_UNORM 0x080 +#define GEN6_SURFACEFORMAT_R16G16B16A16_SNORM 0x081 +#define GEN6_SURFACEFORMAT_R16G16B16A16_SINT 0x082 +#define GEN6_SURFACEFORMAT_R16G16B16A16_UINT 0x083 +#define GEN6_SURFACEFORMAT_R16G16B16A16_FLOAT 0x084 +#define GEN6_SURFACEFORMAT_R32G32_FLOAT 0x085 +#define GEN6_SURFACEFORMAT_R32G32_SINT 0x086 +#define GEN6_SURFACEFORMAT_R32G32_UINT 0x087 +#define GEN6_SURFACEFORMAT_R32_FLOAT_X8X24_TYPELESS 0x088 +#define GEN6_SURFACEFORMAT_X32_TYPELESS_G8X24_UINT 0x089 +#define GEN6_SURFACEFORMAT_L32A32_FLOAT 0x08A +#define GEN6_SURFACEFORMAT_R32G32_UNORM 0x08B +#define GEN6_SURFACEFORMAT_R32G32_SNORM 0x08C +#define GEN6_SURFACEFORMAT_R64_FLOAT 0x08D +#define GEN6_SURFACEFORMAT_R16G16B16X16_UNORM 0x08E +#define GEN6_SURFACEFORMAT_R16G16B16X16_FLOAT 0x08F +#define GEN6_SURFACEFORMAT_A32X32_FLOAT 0x090 +#define GEN6_SURFACEFORMAT_L32X32_FLOAT 0x091 +#define GEN6_SURFACEFORMAT_I32X32_FLOAT 0x092 +#define GEN6_SURFACEFORMAT_R16G16B16A16_SSCALED 0x093 +#define GEN6_SURFACEFORMAT_R16G16B16A16_USCALED 0x094 +#define GEN6_SURFACEFORMAT_R32G32_SSCALED 0x095 +#define GEN6_SURFACEFORMAT_R32G32_USCALED 0x096 +#define GEN6_SURFACEFORMAT_B8G8R8A8_UNORM 0x0C0 +#define GEN6_SURFACEFORMAT_B8G8R8A8_UNORM_SRGB 0x0C1 +#define GEN6_SURFACEFORMAT_R10G10B10A2_UNORM 0x0C2 +#define GEN6_SURFACEFORMAT_R10G10B10A2_UNORM_SRGB 0x0C3 +#define GEN6_SURFACEFORMAT_R10G10B10A2_UINT 0x0C4 +#define GEN6_SURFACEFORMAT_R10G10B10_SNORM_A2_UNORM 0x0C5 +#define GEN6_SURFACEFORMAT_R8G8B8A8_UNORM 0x0C7 +#define GEN6_SURFACEFORMAT_R8G8B8A8_UNORM_SRGB 0x0C8 +#define GEN6_SURFACEFORMAT_R8G8B8A8_SNORM 0x0C9 +#define GEN6_SURFACEFORMAT_R8G8B8A8_SINT 0x0CA +#define GEN6_SURFACEFORMAT_R8G8B8A8_UINT 0x0CB +#define GEN6_SURFACEFORMAT_R16G16_UNORM 0x0CC +#define GEN6_SURFACEFORMAT_R16G16_SNORM 0x0CD +#define GEN6_SURFACEFORMAT_R16G16_SINT 0x0CE +#define GEN6_SURFACEFORMAT_R16G16_UINT 0x0CF +#define GEN6_SURFACEFORMAT_R16G16_FLOAT 0x0D0 +#define GEN6_SURFACEFORMAT_B10G10R10A2_UNORM 0x0D1 +#define GEN6_SURFACEFORMAT_B10G10R10A2_UNORM_SRGB 0x0D2 +#define GEN6_SURFACEFORMAT_R11G11B10_FLOAT 0x0D3 +#define GEN6_SURFACEFORMAT_R32_SINT 0x0D6 +#define GEN6_SURFACEFORMAT_R32_UINT 0x0D7 +#define GEN6_SURFACEFORMAT_R32_FLOAT 0x0D8 +#define GEN6_SURFACEFORMAT_R24_UNORM_X8_TYPELESS 0x0D9 +#define GEN6_SURFACEFORMAT_X24_TYPELESS_G8_UINT 0x0DA +#define GEN6_SURFACEFORMAT_L16A16_UNORM 0x0DF +#define GEN6_SURFACEFORMAT_I24X8_UNORM 0x0E0 +#define GEN6_SURFACEFORMAT_L24X8_UNORM 0x0E1 +#define GEN6_SURFACEFORMAT_A24X8_UNORM 0x0E2 +#define GEN6_SURFACEFORMAT_I32_FLOAT 0x0E3 +#define GEN6_SURFACEFORMAT_L32_FLOAT 0x0E4 +#define GEN6_SURFACEFORMAT_A32_FLOAT 0x0E5 +#define GEN6_SURFACEFORMAT_B8G8R8X8_UNORM 0x0E9 +#define GEN6_SURFACEFORMAT_B8G8R8X8_UNORM_SRGB 0x0EA +#define GEN6_SURFACEFORMAT_R8G8B8X8_UNORM 0x0EB +#define GEN6_SURFACEFORMAT_R8G8B8X8_UNORM_SRGB 0x0EC +#define GEN6_SURFACEFORMAT_R9G9B9E5_SHAREDEXP 0x0ED +#define GEN6_SURFACEFORMAT_B10G10R10X2_UNORM 0x0EE +#define GEN6_SURFACEFORMAT_L16A16_FLOAT 0x0F0 +#define GEN6_SURFACEFORMAT_R32_UNORM 0x0F1 +#define GEN6_SURFACEFORMAT_R32_SNORM 0x0F2 +#define GEN6_SURFACEFORMAT_R10G10B10X2_USCALED 0x0F3 +#define GEN6_SURFACEFORMAT_R8G8B8A8_SSCALED 0x0F4 +#define GEN6_SURFACEFORMAT_R8G8B8A8_USCALED 0x0F5 +#define GEN6_SURFACEFORMAT_R16G16_SSCALED 0x0F6 +#define GEN6_SURFACEFORMAT_R16G16_USCALED 0x0F7 +#define GEN6_SURFACEFORMAT_R32_SSCALED 0x0F8 +#define GEN6_SURFACEFORMAT_R32_USCALED 0x0F9 +#define GEN6_SURFACEFORMAT_B5G6R5_UNORM 0x100 +#define GEN6_SURFACEFORMAT_B5G6R5_UNORM_SRGB 0x101 +#define GEN6_SURFACEFORMAT_B5G5R5A1_UNORM 0x102 +#define GEN6_SURFACEFORMAT_B5G5R5A1_UNORM_SRGB 0x103 +#define GEN6_SURFACEFORMAT_B4G4R4A4_UNORM 0x104 +#define GEN6_SURFACEFORMAT_B4G4R4A4_UNORM_SRGB 0x105 +#define GEN6_SURFACEFORMAT_R8G8_UNORM 0x106 +#define GEN6_SURFACEFORMAT_R8G8_SNORM 0x107 +#define GEN6_SURFACEFORMAT_R8G8_SINT 0x108 +#define GEN6_SURFACEFORMAT_R8G8_UINT 0x109 +#define GEN6_SURFACEFORMAT_R16_UNORM 0x10A +#define GEN6_SURFACEFORMAT_R16_SNORM 0x10B +#define GEN6_SURFACEFORMAT_R16_SINT 0x10C +#define GEN6_SURFACEFORMAT_R16_UINT 0x10D +#define GEN6_SURFACEFORMAT_R16_FLOAT 0x10E +#define GEN6_SURFACEFORMAT_I16_UNORM 0x111 +#define GEN6_SURFACEFORMAT_L16_UNORM 0x112 +#define GEN6_SURFACEFORMAT_A16_UNORM 0x113 +#define GEN6_SURFACEFORMAT_L8A8_UNORM 0x114 +#define GEN6_SURFACEFORMAT_I16_FLOAT 0x115 +#define GEN6_SURFACEFORMAT_L16_FLOAT 0x116 +#define GEN6_SURFACEFORMAT_A16_FLOAT 0x117 +#define GEN6_SURFACEFORMAT_R5G5_SNORM_B6_UNORM 0x119 +#define GEN6_SURFACEFORMAT_B5G5R5X1_UNORM 0x11A +#define GEN6_SURFACEFORMAT_B5G5R5X1_UNORM_SRGB 0x11B +#define GEN6_SURFACEFORMAT_R8G8_SSCALED 0x11C +#define GEN6_SURFACEFORMAT_R8G8_USCALED 0x11D +#define GEN6_SURFACEFORMAT_R16_SSCALED 0x11E +#define GEN6_SURFACEFORMAT_R16_USCALED 0x11F +#define GEN6_SURFACEFORMAT_R8_UNORM 0x140 +#define GEN6_SURFACEFORMAT_R8_SNORM 0x141 +#define GEN6_SURFACEFORMAT_R8_SINT 0x142 +#define GEN6_SURFACEFORMAT_R8_UINT 0x143 +#define GEN6_SURFACEFORMAT_A8_UNORM 0x144 +#define GEN6_SURFACEFORMAT_I8_UNORM 0x145 +#define GEN6_SURFACEFORMAT_L8_UNORM 0x146 +#define GEN6_SURFACEFORMAT_P4A4_UNORM 0x147 +#define GEN6_SURFACEFORMAT_A4P4_UNORM 0x148 +#define GEN6_SURFACEFORMAT_R8_SSCALED 0x149 +#define GEN6_SURFACEFORMAT_R8_USCALED 0x14A +#define GEN6_SURFACEFORMAT_R1_UINT 0x181 +#define GEN6_SURFACEFORMAT_YCRCB_NORMAL 0x182 +#define GEN6_SURFACEFORMAT_YCRCB_SWAPUVY 0x183 +#define GEN6_SURFACEFORMAT_BC1_UNORM 0x186 +#define GEN6_SURFACEFORMAT_BC2_UNORM 0x187 +#define GEN6_SURFACEFORMAT_BC3_UNORM 0x188 +#define GEN6_SURFACEFORMAT_BC4_UNORM 0x189 +#define GEN6_SURFACEFORMAT_BC5_UNORM 0x18A +#define GEN6_SURFACEFORMAT_BC1_UNORM_SRGB 0x18B +#define GEN6_SURFACEFORMAT_BC2_UNORM_SRGB 0x18C +#define GEN6_SURFACEFORMAT_BC3_UNORM_SRGB 0x18D +#define GEN6_SURFACEFORMAT_MONO8 0x18E +#define GEN6_SURFACEFORMAT_YCRCB_SWAPUV 0x18F +#define GEN6_SURFACEFORMAT_YCRCB_SWAPY 0x190 +#define GEN6_SURFACEFORMAT_DXT1_RGB 0x191 +#define GEN6_SURFACEFORMAT_FXT1 0x192 +#define GEN6_SURFACEFORMAT_R8G8B8_UNORM 0x193 +#define GEN6_SURFACEFORMAT_R8G8B8_SNORM 0x194 +#define GEN6_SURFACEFORMAT_R8G8B8_SSCALED 0x195 +#define GEN6_SURFACEFORMAT_R8G8B8_USCALED 0x196 +#define GEN6_SURFACEFORMAT_R64G64B64A64_FLOAT 0x197 +#define GEN6_SURFACEFORMAT_R64G64B64_FLOAT 0x198 +#define GEN6_SURFACEFORMAT_BC4_SNORM 0x199 +#define GEN6_SURFACEFORMAT_BC5_SNORM 0x19A +#define GEN6_SURFACEFORMAT_R16G16B16_UNORM 0x19C +#define GEN6_SURFACEFORMAT_R16G16B16_SNORM 0x19D +#define GEN6_SURFACEFORMAT_R16G16B16_SSCALED 0x19E +#define GEN6_SURFACEFORMAT_R16G16B16_USCALED 0x19F + +#define GEN6_SURFACERETURNFORMAT_FLOAT32 0 +#define GEN6_SURFACERETURNFORMAT_S1 1 + +#define GEN6_SURFACE_1D 0 +#define GEN6_SURFACE_2D 1 +#define GEN6_SURFACE_3D 2 +#define GEN6_SURFACE_CUBE 3 +#define GEN6_SURFACE_BUFFER 4 +#define GEN6_SURFACE_NULL 7 + +#define GEN6_BORDER_COLOR_MODE_DEFAULT 0 +#define GEN6_BORDER_COLOR_MODE_LEGACY 1 + +#define GEN6_TEXCOORDMODE_WRAP 0 +#define GEN6_TEXCOORDMODE_MIRROR 1 +#define GEN6_TEXCOORDMODE_CLAMP 2 +#define GEN6_TEXCOORDMODE_CUBE 3 +#define GEN6_TEXCOORDMODE_CLAMP_BORDER 4 +#define GEN6_TEXCOORDMODE_MIRROR_ONCE 5 + +#define GEN6_THREAD_PRIORITY_NORMAL 0 +#define GEN6_THREAD_PRIORITY_HIGH 1 + +#define GEN6_TILEWALK_XMAJOR 0 +#define GEN6_TILEWALK_YMAJOR 1 + +#define GEN6_VERTEX_SUBPIXEL_PRECISION_8BITS 0 +#define GEN6_VERTEX_SUBPIXEL_PRECISION_4BITS 1 + +#define GEN6_VERTEXBUFFER_ACCESS_VERTEXDATA 0 +#define GEN6_VERTEXBUFFER_ACCESS_INSTANCEDATA 1 + +#define GEN6_VFCOMPONENT_NOSTORE 0 +#define GEN6_VFCOMPONENT_STORE_SRC 1 +#define GEN6_VFCOMPONENT_STORE_0 2 +#define GEN6_VFCOMPONENT_STORE_1_FLT 3 +#define GEN6_VFCOMPONENT_STORE_1_INT 4 +#define GEN6_VFCOMPONENT_STORE_VID 5 +#define GEN6_VFCOMPONENT_STORE_IID 6 +#define GEN6_VFCOMPONENT_STORE_PID 7 + + + +/* Execution Unit (EU) defines + */ + +#define GEN6_ALIGN_1 0 +#define GEN6_ALIGN_16 1 + +#define GEN6_ADDRESS_DIRECT 0 +#define GEN6_ADDRESS_REGISTER_INDIRECT_REGISTER 1 + +#define GEN6_CHANNEL_X 0 +#define GEN6_CHANNEL_Y 1 +#define GEN6_CHANNEL_Z 2 +#define GEN6_CHANNEL_W 3 + +#define GEN6_COMPRESSION_NONE 0 +#define GEN6_COMPRESSION_2NDHALF 1 +#define GEN6_COMPRESSION_COMPRESSED 2 + +#define GEN6_CONDITIONAL_NONE 0 +#define GEN6_CONDITIONAL_Z 1 +#define GEN6_CONDITIONAL_NZ 2 +#define GEN6_CONDITIONAL_EQ 1 /* Z */ +#define GEN6_CONDITIONAL_NEQ 2 /* NZ */ +#define GEN6_CONDITIONAL_G 3 +#define GEN6_CONDITIONAL_GE 4 +#define GEN6_CONDITIONAL_L 5 +#define GEN6_CONDITIONAL_LE 6 +#define GEN6_CONDITIONAL_C 7 +#define GEN6_CONDITIONAL_O 8 + +#define GEN6_DEBUG_NONE 0 +#define GEN6_DEBUG_BREAKPOINT 1 + +#define GEN6_DEPENDENCY_NORMAL 0 +#define GEN6_DEPENDENCY_NOTCLEARED 1 +#define GEN6_DEPENDENCY_NOTCHECKED 2 +#define GEN6_DEPENDENCY_DISABLE 3 + +#define GEN6_EXECUTE_1 0 +#define GEN6_EXECUTE_2 1 +#define GEN6_EXECUTE_4 2 +#define GEN6_EXECUTE_8 3 +#define GEN6_EXECUTE_16 4 +#define GEN6_EXECUTE_32 5 + +#define GEN6_HORIZONTAL_STRIDE_0 0 +#define GEN6_HORIZONTAL_STRIDE_1 1 +#define GEN6_HORIZONTAL_STRIDE_2 2 +#define GEN6_HORIZONTAL_STRIDE_4 3 + +#define GEN6_INSTRUCTION_NORMAL 0 +#define GEN6_INSTRUCTION_SATURATE 1 + +#define GEN6_MASK_ENABLE 0 +#define GEN6_MASK_DISABLE 1 + +#define GEN6_OPCODE_MOV 1 +#define GEN6_OPCODE_SEL 2 +#define GEN6_OPCODE_NOT 4 +#define GEN6_OPCODE_AND 5 +#define GEN6_OPCODE_OR 6 +#define GEN6_OPCODE_XOR 7 +#define GEN6_OPCODE_SHR 8 +#define GEN6_OPCODE_SHL 9 +#define GEN6_OPCODE_RSR 10 +#define GEN6_OPCODE_RSL 11 +#define GEN6_OPCODE_ASR 12 +#define GEN6_OPCODE_CMP 16 +#define GEN6_OPCODE_JMPI 32 +#define GEN6_OPCODE_IF 34 +#define GEN6_OPCODE_IFF 35 +#define GEN6_OPCODE_ELSE 36 +#define GEN6_OPCODE_ENDIF 37 +#define GEN6_OPCODE_DO 38 +#define GEN6_OPCODE_WHILE 39 +#define GEN6_OPCODE_BREAK 40 +#define GEN6_OPCODE_CONTINUE 41 +#define GEN6_OPCODE_HALT 42 +#define GEN6_OPCODE_MSAVE 44 +#define GEN6_OPCODE_MRESTORE 45 +#define GEN6_OPCODE_PUSH 46 +#define GEN6_OPCODE_POP 47 +#define GEN6_OPCODE_WAIT 48 +#define GEN6_OPCODE_SEND 49 +#define GEN6_OPCODE_ADD 64 +#define GEN6_OPCODE_MUL 65 +#define GEN6_OPCODE_AVG 66 +#define GEN6_OPCODE_FRC 67 +#define GEN6_OPCODE_RNDU 68 +#define GEN6_OPCODE_RNDD 69 +#define GEN6_OPCODE_RNDE 70 +#define GEN6_OPCODE_RNDZ 71 +#define GEN6_OPCODE_MAC 72 +#define GEN6_OPCODE_MACH 73 +#define GEN6_OPCODE_LZD 74 +#define GEN6_OPCODE_SAD2 80 +#define GEN6_OPCODE_SADA2 81 +#define GEN6_OPCODE_DP4 84 +#define GEN6_OPCODE_DPH 85 +#define GEN6_OPCODE_DP3 86 +#define GEN6_OPCODE_DP2 87 +#define GEN6_OPCODE_DPA2 88 +#define GEN6_OPCODE_LINE 89 +#define GEN6_OPCODE_NOP 126 + +#define GEN6_PREDICATE_NONE 0 +#define GEN6_PREDICATE_NORMAL 1 +#define GEN6_PREDICATE_ALIGN1_ANYV 2 +#define GEN6_PREDICATE_ALIGN1_ALLV 3 +#define GEN6_PREDICATE_ALIGN1_ANY2H 4 +#define GEN6_PREDICATE_ALIGN1_ALL2H 5 +#define GEN6_PREDICATE_ALIGN1_ANY4H 6 +#define GEN6_PREDICATE_ALIGN1_ALL4H 7 +#define GEN6_PREDICATE_ALIGN1_ANY8H 8 +#define GEN6_PREDICATE_ALIGN1_ALL8H 9 +#define GEN6_PREDICATE_ALIGN1_ANY16H 10 +#define GEN6_PREDICATE_ALIGN1_ALL16H 11 +#define GEN6_PREDICATE_ALIGN16_REPLICATE_X 2 +#define GEN6_PREDICATE_ALIGN16_REPLICATE_Y 3 +#define GEN6_PREDICATE_ALIGN16_REPLICATE_Z 4 +#define GEN6_PREDICATE_ALIGN16_REPLICATE_W 5 +#define GEN6_PREDICATE_ALIGN16_ANY4H 6 +#define GEN6_PREDICATE_ALIGN16_ALL4H 7 + +#define GEN6_ARCHITECTURE_REGISTER_FILE 0 +#define GEN6_GENERAL_REGISTER_FILE 1 +#define GEN6_MESSAGE_REGISTER_FILE 2 +#define GEN6_IMMEDIATE_VALUE 3 + +#define GEN6_REGISTER_TYPE_UD 0 +#define GEN6_REGISTER_TYPE_D 1 +#define GEN6_REGISTER_TYPE_UW 2 +#define GEN6_REGISTER_TYPE_W 3 +#define GEN6_REGISTER_TYPE_UB 4 +#define GEN6_REGISTER_TYPE_B 5 +#define GEN6_REGISTER_TYPE_VF 5 /* packed float vector, immediates only? */ +#define GEN6_REGISTER_TYPE_HF 6 +#define GEN6_REGISTER_TYPE_V 6 /* packed int vector, immediates only, uword dest only */ +#define GEN6_REGISTER_TYPE_F 7 + +#define GEN6_ARF_NULL 0x00 +#define GEN6_ARF_ADDRESS 0x10 +#define GEN6_ARF_ACCUMULATOR 0x20 +#define GEN6_ARF_FLAG 0x30 +#define GEN6_ARF_MASK 0x40 +#define GEN6_ARF_MASK_STACK 0x50 +#define GEN6_ARF_MASK_STACK_DEPTH 0x60 +#define GEN6_ARF_STATE 0x70 +#define GEN6_ARF_CONTROL 0x80 +#define GEN6_ARF_NOTIFICATION_COUNT 0x90 +#define GEN6_ARF_IP 0xA0 + +#define GEN6_AMASK 0 +#define GEN6_IMASK 1 +#define GEN6_LMASK 2 +#define GEN6_CMASK 3 + + + +#define GEN6_THREAD_NORMAL 0 +#define GEN6_THREAD_ATOMIC 1 +#define GEN6_THREAD_SWITCH 2 + +#define GEN6_VERTICAL_STRIDE_0 0 +#define GEN6_VERTICAL_STRIDE_1 1 +#define GEN6_VERTICAL_STRIDE_2 2 +#define GEN6_VERTICAL_STRIDE_4 3 +#define GEN6_VERTICAL_STRIDE_8 4 +#define GEN6_VERTICAL_STRIDE_16 5 +#define GEN6_VERTICAL_STRIDE_32 6 +#define GEN6_VERTICAL_STRIDE_64 7 +#define GEN6_VERTICAL_STRIDE_128 8 +#define GEN6_VERTICAL_STRIDE_256 9 +#define GEN6_VERTICAL_STRIDE_ONE_DIMENSIONAL 0xF + +#define GEN6_WIDTH_1 0 +#define GEN6_WIDTH_2 1 +#define GEN6_WIDTH_4 2 +#define GEN6_WIDTH_8 3 +#define GEN6_WIDTH_16 4 + +#define GEN6_STATELESS_BUFFER_BOUNDARY_1K 0 +#define GEN6_STATELESS_BUFFER_BOUNDARY_2K 1 +#define GEN6_STATELESS_BUFFER_BOUNDARY_4K 2 +#define GEN6_STATELESS_BUFFER_BOUNDARY_8K 3 +#define GEN6_STATELESS_BUFFER_BOUNDARY_16K 4 +#define GEN6_STATELESS_BUFFER_BOUNDARY_32K 5 +#define GEN6_STATELESS_BUFFER_BOUNDARY_64K 6 +#define GEN6_STATELESS_BUFFER_BOUNDARY_128K 7 +#define GEN6_STATELESS_BUFFER_BOUNDARY_256K 8 +#define GEN6_STATELESS_BUFFER_BOUNDARY_512K 9 +#define GEN6_STATELESS_BUFFER_BOUNDARY_1M 10 +#define GEN6_STATELESS_BUFFER_BOUNDARY_2M 11 + +#define GEN6_POLYGON_FACING_FRONT 0 +#define GEN6_POLYGON_FACING_BACK 1 + +#define GEN6_MESSAGE_TARGET_NULL 0 +#define GEN6_MESSAGE_TARGET_MATH 1 +#define GEN6_MESSAGE_TARGET_SAMPLER 2 +#define GEN6_MESSAGE_TARGET_GATEWAY 3 +#define GEN6_MESSAGE_TARGET_DATAPORT_READ 4 +#define GEN6_MESSAGE_TARGET_DATAPORT_WRITE 5 +#define GEN6_MESSAGE_TARGET_URB 6 +#define GEN6_MESSAGE_TARGET_THREAD_SPAWNER 7 + +#define GEN6_SAMPLER_RETURN_FORMAT_FLOAT32 0 +#define GEN6_SAMPLER_RETURN_FORMAT_UINT32 2 +#define GEN6_SAMPLER_RETURN_FORMAT_SINT32 3 + +#define GEN6_SAMPLER_MESSAGE_SIMD8_SAMPLE 0 +#define GEN6_SAMPLER_MESSAGE_SIMD16_SAMPLE 0 +#define GEN6_SAMPLER_MESSAGE_SIMD16_SAMPLE_BIAS 0 +#define GEN6_SAMPLER_MESSAGE_SIMD8_KILLPIX 1 +#define GEN6_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_LOD 1 +#define GEN6_SAMPLER_MESSAGE_SIMD16_SAMPLE_LOD 1 +#define GEN6_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_GRADIENTS 2 +#define GEN6_SAMPLER_MESSAGE_SIMD8_SAMPLE_GRADIENTS 2 +#define GEN6_SAMPLER_MESSAGE_SIMD4X2_SAMPLE_COMPARE 0 +#define GEN6_SAMPLER_MESSAGE_SIMD16_SAMPLE_COMPARE 2 +#define GEN6_SAMPLER_MESSAGE_SIMD4X2_RESINFO 2 +#define GEN6_SAMPLER_MESSAGE_SIMD8_RESINFO 2 +#define GEN6_SAMPLER_MESSAGE_SIMD16_RESINFO 2 +#define GEN6_SAMPLER_MESSAGE_SIMD4X2_LD 3 +#define GEN6_SAMPLER_MESSAGE_SIMD8_LD 3 +#define GEN6_SAMPLER_MESSAGE_SIMD16_LD 3 + +#define GEN6_DATAPORT_OWORD_BLOCK_1_OWORDLOW 0 +#define GEN6_DATAPORT_OWORD_BLOCK_1_OWORDHIGH 1 +#define GEN6_DATAPORT_OWORD_BLOCK_2_OWORDS 2 +#define GEN6_DATAPORT_OWORD_BLOCK_4_OWORDS 3 +#define GEN6_DATAPORT_OWORD_BLOCK_8_OWORDS 4 + +#define GEN6_DATAPORT_OWORD_DUAL_BLOCK_1OWORD 0 +#define GEN6_DATAPORT_OWORD_DUAL_BLOCK_4OWORDS 2 + +#define GEN6_DATAPORT_DWORD_SCATTERED_BLOCK_8DWORDS 2 +#define GEN6_DATAPORT_DWORD_SCATTERED_BLOCK_16DWORDS 3 + +#define GEN6_DATAPORT_READ_MESSAGE_OWORD_BLOCK_READ 0 +#define GEN6_DATAPORT_READ_MESSAGE_OWORD_DUAL_BLOCK_READ 1 +#define GEN6_DATAPORT_READ_MESSAGE_DWORD_BLOCK_READ 2 +#define GEN6_DATAPORT_READ_MESSAGE_DWORD_SCATTERED_READ 3 + +#define GEN6_DATAPORT_READ_TARGET_DATA_CACHE 0 +#define GEN6_DATAPORT_READ_TARGET_RENDER_CACHE 1 +#define GEN6_DATAPORT_READ_TARGET_SAMPLER_CACHE 2 + +#define GEN6_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE 0 +#define GEN6_DATAPORT_RENDER_TARGET_WRITE_SIMD16_SINGLE_SOURCE_REPLICATED 1 +#define GEN6_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN01 2 +#define GEN6_DATAPORT_RENDER_TARGET_WRITE_SIMD8_DUAL_SOURCE_SUBSPAN23 3 +#define GEN6_DATAPORT_RENDER_TARGET_WRITE_SIMD8_SINGLE_SOURCE_SUBSPAN01 4 + +#define GEN6_DATAPORT_WRITE_MESSAGE_OWORD_BLOCK_WRITE 0 +#define GEN6_DATAPORT_WRITE_MESSAGE_OWORD_DUAL_BLOCK_WRITE 1 +#define GEN6_DATAPORT_WRITE_MESSAGE_DWORD_BLOCK_WRITE 2 +#define GEN6_DATAPORT_WRITE_MESSAGE_DWORD_SCATTERED_WRITE 3 +#define GEN6_DATAPORT_WRITE_MESSAGE_RENDER_TARGET_WRITE 4 +#define GEN6_DATAPORT_WRITE_MESSAGE_STREAMED_VERTEX_BUFFER_WRITE 5 +#define GEN6_DATAPORT_WRITE_MESSAGE_FLUSH_RENDER_CACHE 7 + +#define GEN6_MATH_FUNCTION_INV 1 +#define GEN6_MATH_FUNCTION_LOG 2 +#define GEN6_MATH_FUNCTION_EXP 3 +#define GEN6_MATH_FUNCTION_SQRT 4 +#define GEN6_MATH_FUNCTION_RSQ 5 +#define GEN6_MATH_FUNCTION_SIN 6 /* was 7 */ +#define GEN6_MATH_FUNCTION_COS 7 /* was 8 */ +#define GEN6_MATH_FUNCTION_SINCOS 8 /* was 6 */ +#define GEN6_MATH_FUNCTION_TAN 9 +#define GEN6_MATH_FUNCTION_POW 10 +#define GEN6_MATH_FUNCTION_INT_DIV_QUOTIENT_AND_REMAINDER 11 +#define GEN6_MATH_FUNCTION_INT_DIV_QUOTIENT 12 +#define GEN6_MATH_FUNCTION_INT_DIV_REMAINDER 13 + +#define GEN6_MATH_INTEGER_UNSIGNED 0 +#define GEN6_MATH_INTEGER_SIGNED 1 + +#define GEN6_MATH_PRECISION_FULL 0 +#define GEN6_MATH_PRECISION_PARTIAL 1 + +#define GEN6_MATH_SATURATE_NONE 0 +#define GEN6_MATH_SATURATE_SATURATE 1 + +#define GEN6_MATH_DATA_VECTOR 0 +#define GEN6_MATH_DATA_SCALAR 1 + +#define GEN6_URB_OPCODE_WRITE 0 + +#define GEN6_URB_SWIZZLE_NONE 0 +#define GEN6_URB_SWIZZLE_INTERLEAVE 1 +#define GEN6_URB_SWIZZLE_TRANSPOSE 2 + +#define GEN6_SCRATCH_SPACE_SIZE_1K 0 +#define GEN6_SCRATCH_SPACE_SIZE_2K 1 +#define GEN6_SCRATCH_SPACE_SIZE_4K 2 +#define GEN6_SCRATCH_SPACE_SIZE_8K 3 +#define GEN6_SCRATCH_SPACE_SIZE_16K 4 +#define GEN6_SCRATCH_SPACE_SIZE_32K 5 +#define GEN6_SCRATCH_SPACE_SIZE_64K 6 +#define GEN6_SCRATCH_SPACE_SIZE_128K 7 +#define GEN6_SCRATCH_SPACE_SIZE_256K 8 +#define GEN6_SCRATCH_SPACE_SIZE_512K 9 +#define GEN6_SCRATCH_SPACE_SIZE_1M 10 +#define GEN6_SCRATCH_SPACE_SIZE_2M 11 + +/* The hardware supports two different modes for border color. The + * default (OpenGL) mode uses floating-point color channels, while the + * legacy mode uses 4 bytes. + * + * More significantly, the legacy mode respects the components of the + * border color for channels not present in the source, (whereas the + * default mode will ignore the border color's alpha channel and use + * alpha==1 for an RGB source, for example). + * + * The legacy mode matches the semantics specified by the Render + * extension. + */ +struct gen6_sampler_default_border_color { + float color[4]; +}; + +struct gen6_sampler_legacy_border_color { + uint8_t color[4]; +}; + +struct gen6_sampler_state { + struct { + uint32_t shadow_function:3; + uint32_t lod_bias:11; + uint32_t min_filter:3; + uint32_t mag_filter:3; + uint32_t mip_filter:2; + uint32_t base_level:5; + uint32_t pad:1; + uint32_t lod_preclamp:1; + uint32_t border_color_mode:1; + uint32_t pad0:1; + uint32_t disable:1; + } ss0; + + struct { + uint32_t r_wrap_mode:3; + uint32_t t_wrap_mode:3; + uint32_t s_wrap_mode:3; + uint32_t pad:3; + uint32_t max_lod:10; + uint32_t min_lod:10; + } ss1; + + struct { + uint32_t border_color; + } ss2; + + struct { + uint32_t pad:19; + uint32_t max_aniso:3; + uint32_t chroma_key_mode:1; + uint32_t chroma_key_index:2; + uint32_t chroma_key_enable:1; + uint32_t monochrome_filter_width:3; + uint32_t monochrome_filter_height:3; + } ss3; +}; + +struct gen6_blend_state { + struct { + uint32_t dest_blend_factor:5; + uint32_t source_blend_factor:5; + uint32_t pad3:1; + uint32_t blend_func:3; + uint32_t pad2:1; + uint32_t ia_dest_blend_factor:5; + uint32_t ia_source_blend_factor:5; + uint32_t pad1:1; + uint32_t ia_blend_func:3; + uint32_t pad0:1; + uint32_t ia_blend_enable:1; + uint32_t blend_enable:1; + } blend0; + + struct { + uint32_t post_blend_clamp_enable:1; + uint32_t pre_blend_clamp_enable:1; + uint32_t clamp_range:2; + uint32_t pad0:4; + uint32_t x_dither_offset:2; + uint32_t y_dither_offset:2; + uint32_t dither_enable:1; + uint32_t alpha_test_func:3; + uint32_t alpha_test_enable:1; + uint32_t pad1:1; + uint32_t logic_op_func:4; + uint32_t logic_op_enable:1; + uint32_t pad2:1; + uint32_t write_disable_b:1; + uint32_t write_disable_g:1; + uint32_t write_disable_r:1; + uint32_t write_disable_a:1; + uint32_t pad3:1; + uint32_t alpha_to_coverage_dither:1; + uint32_t alpha_to_one:1; + uint32_t alpha_to_coverage:1; + } blend1; +}; + +struct gen6_color_calc_state { + struct { + uint32_t alpha_test_format:1; + uint32_t pad0:14; + uint32_t round_disable:1; + uint32_t bf_stencil_ref:8; + uint32_t stencil_ref:8; + } cc0; + + union { + float alpha_ref_f; + struct { + uint32_t ui:8; + uint32_t pad0:24; + } alpha_ref_fi; + } cc1; + + float constant_r; + float constant_g; + float constant_b; + float constant_a; +}; + +struct gen6_depth_stencil_state { + struct { + uint32_t pad0:3; + uint32_t bf_stencil_pass_depth_pass_op:3; + uint32_t bf_stencil_pass_depth_fail_op:3; + uint32_t bf_stencil_fail_op:3; + uint32_t bf_stencil_func:3; + uint32_t bf_stencil_enable:1; + uint32_t pad1:2; + uint32_t stencil_write_enable:1; + uint32_t stencil_pass_depth_pass_op:3; + uint32_t stencil_pass_depth_fail_op:3; + uint32_t stencil_fail_op:3; + uint32_t stencil_func:3; + uint32_t stencil_enable:1; + } ds0; + + struct { + uint32_t bf_stencil_write_mask:8; + uint32_t bf_stencil_test_mask:8; + uint32_t stencil_write_mask:8; + uint32_t stencil_test_mask:8; + } ds1; + + struct { + uint32_t pad0:26; + uint32_t depth_write_enable:1; + uint32_t depth_test_func:3; + uint32_t pad1:1; + uint32_t depth_test_enable:1; + } ds2; +}; + +struct gen6_surface_state { + struct { + uint32_t cube_pos_z:1; + uint32_t cube_neg_z:1; + uint32_t cube_pos_y:1; + uint32_t cube_neg_y:1; + uint32_t cube_pos_x:1; + uint32_t cube_neg_x:1; + uint32_t pad:3; + uint32_t render_cache_read_mode:1; + uint32_t mipmap_layout_mode:1; + uint32_t vert_line_stride_ofs:1; + uint32_t vert_line_stride:1; + uint32_t color_blend:1; + uint32_t writedisable_blue:1; + uint32_t writedisable_green:1; + uint32_t writedisable_red:1; + uint32_t writedisable_alpha:1; + uint32_t surface_format:9; + uint32_t data_return_format:1; + uint32_t pad0:1; + uint32_t surface_type:3; + } ss0; + + struct { + uint32_t base_addr; + } ss1; + + struct { + uint32_t render_target_rotation:2; + uint32_t mip_count:4; + uint32_t width:13; + uint32_t height:13; + } ss2; + + struct { + uint32_t tile_walk:1; + uint32_t tiled_surface:1; + uint32_t pad:1; + uint32_t pitch:18; + uint32_t depth:11; + } ss3; + + struct { + uint32_t pad:19; + uint32_t min_array_elt:9; + uint32_t min_lod:4; + } ss4; + + struct { + uint32_t pad:20; + uint32_t y_offset:4; + uint32_t pad2:1; + uint32_t x_offset:7; + } ss5; +}; + +/* Surface state DW0 */ +#define GEN6_SURFACE_RC_READ_WRITE (1 << 8) +#define GEN6_SURFACE_MIPLAYOUT_SHIFT 10 +#define GEN6_SURFACE_MIPMAPLAYOUT_BELOW 0 +#define GEN6_SURFACE_MIPMAPLAYOUT_RIGHT 1 +#define GEN6_SURFACE_CUBEFACE_ENABLES 0x3f +#define GEN6_SURFACE_BLEND_ENABLED (1 << 13) +#define GEN6_SURFACE_WRITEDISABLE_B_SHIFT 14 +#define GEN6_SURFACE_WRITEDISABLE_G_SHIFT 15 +#define GEN6_SURFACE_WRITEDISABLE_R_SHIFT 16 +#define GEN6_SURFACE_WRITEDISABLE_A_SHIFT 17 +#define GEN6_SURFACE_FORMAT_SHIFT 18 +#define GEN6_SURFACE_FORMAT_MASK INTEL_MASK(26, 18) + +#define GEN6_SURFACE_TYPE_SHIFT 29 +#define GEN6_SURFACE_TYPE_MASK GEN6_MASK(31, 29) +#define GEN6_SURFACE_1D 0 +#define GEN6_SURFACE_2D 1 +#define GEN6_SURFACE_3D 2 +#define GEN6_SURFACE_CUBE 3 +#define GEN6_SURFACE_BUFFER 4 +#define GEN6_SURFACE_NULL 7 + +/* Surface state DW2 */ +#define GEN6_SURFACE_HEIGHT_SHIFT 19 +#define GEN6_SURFACE_HEIGHT_MASK GEN6_MASK(31, 19) +#define GEN6_SURFACE_WIDTH_SHIFT 6 +#define GEN6_SURFACE_WIDTH_MASK GEN6_MASK(18, 6) +#define GEN6_SURFACE_LOD_SHIFT 2 +#define GEN6_SURFACE_LOD_MASK GEN6_MASK(5, 2) + +/* Surface state DW3 */ +#define GEN6_SURFACE_DEPTH_SHIFT 21 +#define GEN6_SURFACE_DEPTH_MASK GEN6_MASK(31, 21) +#define GEN6_SURFACE_PITCH_SHIFT 3 +#define GEN6_SURFACE_PITCH_MASK GEN6_MASK(19, 3) +#define GEN6_SURFACE_TILED (1 << 1) +#define GEN6_SURFACE_TILED_Y (1 << 0) + +/* Surface state DW4 */ +#define GEN6_SURFACE_MIN_LOD_SHIFT 28 +#define GEN6_SURFACE_MIN_LOD_MASK GEN6_MASK(31, 28) + +/* Surface state DW5 */ +#define GEN6_SURFACE_X_OFFSET_SHIFT 25 +#define GEN6_SURFACE_X_OFFSET_MASK GEN6_MASK(31, 25) +#define GEN6_SURFACE_Y_OFFSET_SHIFT 20 +#define GEN6_SURFACE_Y_OFFSET_MASK GEN6_MASK(23, 20) + +struct gen6_cc_viewport { + float min_depth; + float max_depth; +}; + +typedef enum { + SAMPLER_FILTER_NEAREST = 0, + SAMPLER_FILTER_BILINEAR, + FILTER_COUNT +} sampler_filter_t; + +typedef enum { + SAMPLER_EXTEND_NONE = 0, + SAMPLER_EXTEND_REPEAT, + SAMPLER_EXTEND_PAD, + SAMPLER_EXTEND_REFLECT, + EXTEND_COUNT +} sampler_extend_t; + +#endif diff --git a/src/sna/kgem.c b/src/sna/kgem.c new file mode 100644 index 00000000..0dee6e55 --- /dev/null +++ b/src/sna/kgem.c @@ -0,0 +1,1775 @@ +/* + * Copyright (c) 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Chris Wilson <chris@chris-wilson.co.uk> + * + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "sna.h" +#include "sna_reg.h" + +#include <unistd.h> +#include <sys/ioctl.h> +#include <sys/mman.h> +#include <time.h> +#include <errno.h> +#include <fcntl.h> + +static inline void list_move(struct list *list, struct list *head) +{ + __list_del(list->prev, list->next); + list_add(list, head); +} + +static inline void list_replace(struct list *old, + struct list *new) +{ + new->next = old->next; + new->next->prev = new; + new->prev = old->prev; + new->prev->next = new; +} + +#define list_last_entry(ptr, type, member) \ + list_entry((ptr)->prev, type, member) + +#define list_for_each(pos, head) \ + for (pos = (head)->next; pos != (head); pos = pos->next) + + +#define DBG_NO_HW 0 +#define DBG_NO_VMAP 0 +#define DBG_NO_RELAXED_FENCING 0 +#define DBG_DUMP 0 + +#if DEBUG_KGEM +#undef DBG +#define DBG(x) ErrorF x +#else +#define NDEBUG 1 +#endif + +#define PAGE_SIZE 4096 + +struct kgem_partial_bo { + struct kgem_bo base; + uint32_t used, alloc; + uint32_t need_io : 1; + uint32_t write : 1; +}; + +static struct drm_i915_gem_exec_object2 _kgem_dummy_exec; + +static int gem_set_tiling(int fd, uint32_t handle, int tiling, int stride) +{ + struct drm_i915_gem_set_tiling set_tiling; + int ret; + + do { + set_tiling.handle = handle; + set_tiling.tiling_mode = tiling; + set_tiling.stride = stride; + + ret = ioctl(fd, DRM_IOCTL_I915_GEM_SET_TILING, &set_tiling); + } while (ret == -1 && (errno == EINTR || errno == EAGAIN)); + return set_tiling.tiling_mode; +} + +static void *gem_mmap(int fd, uint32_t handle, int size, int prot) +{ + struct drm_i915_gem_mmap_gtt mmap_arg; + void *ptr; + + DBG(("%s(handle=%d, size=%d, prot=%s)\n", __FUNCTION__, + handle, size, prot & PROT_WRITE ? "read/write" : "read-only")); + + mmap_arg.handle = handle; + if (drmIoctl(fd, DRM_IOCTL_I915_GEM_MMAP_GTT, &mmap_arg)) { + assert(0); + return NULL; + } + + ptr = mmap(0, size, prot, MAP_SHARED, fd, mmap_arg.offset); + if (ptr == MAP_FAILED) { + assert(0); + ptr = NULL; + } + + return ptr; +} + +static int gem_write(int fd, uint32_t handle, + int offset, int length, + const void *src) +{ + struct drm_i915_gem_pwrite pwrite; + + DBG(("%s(handle=%d, offset=%d, len=%d)\n", __FUNCTION__, + handle, offset, length)); + + pwrite.handle = handle; + pwrite.offset = offset; + pwrite.size = length; + pwrite.data_ptr = (uintptr_t)src; + return drmIoctl(fd, DRM_IOCTL_I915_GEM_PWRITE, &pwrite); +} + +static int gem_read(int fd, uint32_t handle, const void *dst, int length) +{ + struct drm_i915_gem_pread pread; + + DBG(("%s(handle=%d, len=%d)\n", __FUNCTION__, + handle, length)); + + pread.handle = handle; + pread.offset = 0; + pread.size = length; + pread.data_ptr = (uintptr_t)dst; + return drmIoctl(fd, DRM_IOCTL_I915_GEM_PREAD, &pread); +} + +Bool kgem_bo_write(struct kgem *kgem, struct kgem_bo *bo, + const void *data, int length) +{ + if (gem_write(kgem->fd, bo->handle, 0, length, data)) + return FALSE; + + _kgem_retire(kgem); + return TRUE; +} + +static uint32_t gem_create(int fd, int size) +{ + struct drm_i915_gem_create create; + +#if DEBUG_KGEM + assert((size & (PAGE_SIZE-1)) == 0); +#endif + + create.handle = 0; + create.size = size; + (void)drmIoctl(fd, DRM_IOCTL_I915_GEM_CREATE, &create); + + return create.handle; +} + +static bool +kgem_busy(struct kgem *kgem, int handle) +{ + struct drm_i915_gem_busy busy; + + busy.handle = handle; + busy.busy = !kgem->wedged; + (void)drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_BUSY, &busy); + + return busy.busy; +} + +static bool +gem_madvise(int fd, uint32_t handle, uint32_t state) +{ + struct drm_i915_gem_madvise madv; + int ret; + + madv.handle = handle; + madv.madv = state; + madv.retained = 1; + ret = drmIoctl(fd, DRM_IOCTL_I915_GEM_MADVISE, &madv); + assert(ret == 0); + + return madv.retained; + (void)ret; +} + +static void gem_close(int fd, uint32_t handle) +{ + struct drm_gem_close close; + + close.handle = handle; + (void)drmIoctl(fd, DRM_IOCTL_GEM_CLOSE, &close); +} + +static struct kgem_bo *__kgem_bo_init(struct kgem_bo *bo, + int handle, int size) +{ + memset(bo, 0, sizeof(*bo)); + + bo->refcnt = 1; + bo->handle = handle; + bo->aperture_size = bo->size = size; + bo->reusable = true; + bo->cpu_read = true; + bo->cpu_write = true; + list_init(&bo->request); + list_init(&bo->list); + + return bo; +} + +static struct kgem_bo *__kgem_bo_alloc(int handle, int size) +{ + struct kgem_bo *bo; + + bo = malloc(sizeof(*bo)); + if (bo == NULL) + return NULL; + + return __kgem_bo_init(bo, handle, size); +} + +static struct kgem_request *__kgem_request_alloc(void) +{ + struct kgem_request *rq; + + rq = malloc(sizeof(*rq)); + assert(rq); + if (rq == NULL) + return rq; + + list_init(&rq->buffers); + + return rq; +} + +static inline unsigned long __fls(unsigned long word) +{ + asm("bsr %1,%0" + : "=r" (word) + : "rm" (word)); + return word; +} + +static struct list *inactive(struct kgem *kgem, + int size) +{ + uint32_t order = __fls(size / PAGE_SIZE); + if (order >= ARRAY_SIZE(kgem->inactive)) + order = ARRAY_SIZE(kgem->inactive)-1; + return &kgem->inactive[order]; +} + +void kgem_init(struct kgem *kgem, int fd, int gen) +{ + drm_i915_getparam_t gp; + struct drm_i915_gem_get_aperture aperture; + int i; + + kgem->fd = fd; + kgem->gen = gen; + kgem->wedged = drmCommandNone(kgem->fd, DRM_I915_GEM_THROTTLE) == -EIO; + kgem->wedged |= DBG_NO_HW; + + kgem->ring = kgem->mode = KGEM_NONE; + kgem->flush = 0; + + kgem->nbatch = 0; + kgem->nreloc = 0; + kgem->nexec = 0; + kgem->surface = ARRAY_SIZE(kgem->batch); + list_init(&kgem->partial); + list_init(&kgem->requests); + list_init(&kgem->active); + for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++) + list_init(&kgem->inactive[i]); + + kgem->next_request = __kgem_request_alloc(); + + kgem->has_vmap = 0; +#if defined(USE_VMAP) && defined(I915_PARAM_HAS_VMAP) + if (!DBG_NO_VMAP) { + drm_i915_getparam_t gp; + + gp.param = I915_PARAM_HAS_VMAP; + gp.value = &i; + kgem->has_vmap = + drmIoctl(kgem->fd, DRM_IOCTL_I915_GETPARAM, &gp) == 0 && + i > 0; + } +#endif + DBG(("%s: using vmap=%d\n", __FUNCTION__, kgem->has_vmap)); + + if (gen < 40) { + kgem->has_relaxed_fencing = 0; + if (!DBG_NO_RELAXED_FENCING) { + drm_i915_getparam_t gp; + + gp.param = I915_PARAM_HAS_RELAXED_FENCING; + gp.value = &i; + if (drmIoctl(kgem->fd, DRM_IOCTL_I915_GETPARAM, &gp) == 0) { + if (gen < 33) + kgem->has_relaxed_fencing = i >= 2; + else + kgem->has_relaxed_fencing = i > 0; + } + } + } else + kgem->has_relaxed_fencing = 1; + DBG(("%s: has relaxed fencing=%d\n", __FUNCTION__, + kgem->has_relaxed_fencing)); + + aperture.aper_available_size = 64*1024*1024; + (void)drmIoctl(fd, DRM_IOCTL_I915_GEM_GET_APERTURE, &aperture); + + kgem->aperture_high = aperture.aper_available_size * 3/4; + kgem->aperture_low = aperture.aper_available_size * 1/4; + kgem->aperture = 0; + DBG(("%s: aperture low=%d, high=%d\n", __FUNCTION__, + kgem->aperture_low, kgem->aperture_high)); + + i = 8; + gp.param = I915_PARAM_NUM_FENCES_AVAIL; + gp.value = &i; + (void)drmIoctl(fd, DRM_IOCTL_I915_GETPARAM, &gp); + kgem->fence_max = i - 2; + + DBG(("%s: max fences=%d\n", __FUNCTION__, kgem->fence_max)); +} + +/* XXX hopefully a good approximation */ +static uint32_t kgem_get_unique_id(struct kgem *kgem) +{ + uint32_t id; + id = ++kgem->unique_id; + if (id == 0) + id = ++kgem->unique_id; + return id; +} + +static uint32_t kgem_surface_size(struct kgem *kgem, + uint32_t width, + uint32_t height, + uint32_t bpp, + uint32_t tiling, + uint32_t *pitch) +{ + uint32_t tile_width, tile_height; + uint32_t size; + + if (kgem->gen == 2) { + if (tiling) { + tile_width = 512; + tile_height = 16; + } else { + tile_width = 64; + tile_height = 2; + } + } else switch (tiling) { + default: + case I915_TILING_NONE: + tile_width = 64; + tile_height = 2; + break; + case I915_TILING_X: + tile_width = 512; + tile_height = 8; + break; + case I915_TILING_Y: + tile_width = 128; + tile_height = 32; + break; + } + + *pitch = ALIGN(width * bpp / 8, tile_width); + if (kgem->gen < 40 && tiling != I915_TILING_NONE) { + if (*pitch > 8192) + return 0; + for (size = tile_width; size < *pitch; size <<= 1) + ; + *pitch = size; + } + + size = *pitch * ALIGN(height, tile_height); + if (kgem->has_relaxed_fencing || tiling == I915_TILING_NONE) + return ALIGN(size, PAGE_SIZE); + + /* We need to allocate a pot fence region for a tiled buffer. */ + if (kgem->gen < 30) + tile_width = 512 * 1024; + else + tile_width = 1024 * 1024; + while (tile_width < size) + tile_width *= 2; + return tile_width; +} + +static uint32_t kgem_aligned_height(uint32_t height, uint32_t tiling) +{ + uint32_t tile_height; + + switch (tiling) { + default: + case I915_TILING_NONE: + tile_height = 2; + break; + case I915_TILING_X: + tile_height = 8; + break; + case I915_TILING_Y: + tile_height = 32; + break; + } + + return ALIGN(height, tile_height); +} + +static struct drm_i915_gem_exec_object2 * +kgem_add_handle(struct kgem *kgem, struct kgem_bo *bo) +{ + struct drm_i915_gem_exec_object2 *exec; + + assert(kgem->nexec < ARRAY_SIZE(kgem->exec)); + exec = memset(&kgem->exec[kgem->nexec++], 0, sizeof(*exec)); + exec->handle = bo->handle; + exec->offset = bo->presumed_offset; + + kgem->aperture += bo->aperture_size; + + return exec; +} + +void _kgem_add_bo(struct kgem *kgem, struct kgem_bo *bo) +{ + bo->exec = kgem_add_handle(kgem, bo); + bo->rq = kgem->next_request; + list_move(&bo->request, &kgem->next_request->buffers); + kgem->flush |= bo->flush; +} + +static uint32_t kgem_end_batch(struct kgem *kgem) +{ + kgem->batch[kgem->nbatch++] = MI_BATCH_BUFFER_END; + if (kgem->nbatch & 1) + kgem->batch[kgem->nbatch++] = MI_NOOP; + + return kgem->nbatch; +} + +static void kgem_fixup_self_relocs(struct kgem *kgem, struct kgem_bo *bo) +{ + int n; + + for (n = 0; n < kgem->nreloc; n++) { + if (kgem->reloc[n].target_handle == 0) { + kgem->reloc[n].target_handle = bo->handle; + kgem->batch[kgem->reloc[n].offset/sizeof(kgem->batch[0])] = + kgem->reloc[n].delta + bo->presumed_offset; + } + } +} + +static void __kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo) +{ + assert(list_is_empty(&bo->list)); + assert(bo->refcnt == 0); + + bo->src_bound = bo->dst_bound = 0; + + if(!bo->reusable) + goto destroy; + + if (!bo->deleted && !bo->exec) { + if (!gem_madvise(kgem->fd, bo->handle, I915_MADV_DONTNEED)) { + kgem->need_purge = 1; + goto destroy; + } + + bo->deleted = 1; + } + + list_move(&bo->list, (bo->rq || bo->needs_flush) ? &kgem->active : inactive(kgem, bo->size)); + return; + +destroy: + if (!bo->exec) { + list_del(&bo->request); + gem_close(kgem->fd, bo->handle); + free(bo); + } +} + +static void kgem_bo_unref(struct kgem *kgem, struct kgem_bo *bo) +{ + if (--bo->refcnt == 0) + __kgem_bo_destroy(kgem, bo); +} + +void _kgem_retire(struct kgem *kgem) +{ + struct kgem_bo *bo, *next; + + list_for_each_entry_safe(bo, next, &kgem->active, list) { + if (bo->rq == NULL && !kgem_busy(kgem, bo->handle)) { + assert(bo->needs_flush); + assert(bo->deleted); + bo->needs_flush = 0; + list_move(&bo->list, inactive(kgem, bo->size)); + } + } + + while (!list_is_empty(&kgem->requests)) { + struct kgem_request *rq; + + rq = list_first_entry(&kgem->requests, + struct kgem_request, + list); + if (kgem_busy(kgem, rq->bo->handle)) + break; + + while (!list_is_empty(&rq->buffers)) { + bo = list_first_entry(&rq->buffers, + struct kgem_bo, + request); + list_del(&bo->request); + bo->rq = NULL; + bo->gpu = false; + + if (bo->refcnt == 0 && !bo->needs_flush) { + assert(bo->deleted); + if (bo->reusable) { + list_move(&bo->list, + inactive(kgem, bo->size)); + } else { + gem_close(kgem->fd, bo->handle); + free(bo); + } + } + } + + rq->bo->refcnt--; + assert(rq->bo->refcnt == 0); + if (gem_madvise(kgem->fd, rq->bo->handle, I915_MADV_DONTNEED)) { + rq->bo->deleted = 1; + list_move(&rq->bo->list, + inactive(kgem, rq->bo->size)); + } else { + kgem->need_purge = 1; + gem_close(kgem->fd, rq->bo->handle); + free(rq->bo); + } + + list_del(&rq->list); + free(rq); + } + + kgem->retire = 0; +} + +static void kgem_commit(struct kgem *kgem) +{ + struct kgem_request *rq = kgem->next_request; + struct kgem_bo *bo, *next; + + list_for_each_entry_safe(bo, next, &rq->buffers, request) { + bo->src_bound = bo->dst_bound = 0; + bo->presumed_offset = bo->exec->offset; + bo->exec = NULL; + bo->dirty = false; + bo->gpu = true; + bo->cpu_read = false; + bo->cpu_write = false; + + if (!bo->refcnt) { + if (!bo->reusable) { +destroy: + list_del(&bo->list); + list_del(&bo->request); + gem_close(kgem->fd, bo->handle); + free(bo); + continue; + } + if (!bo->deleted) { + if (!gem_madvise(kgem->fd, bo->handle, + I915_MADV_DONTNEED)) { + kgem->need_purge = 1; + goto destroy; + } + bo->deleted = 1; + } + } + } + + list_add_tail(&rq->list, &kgem->requests); + kgem->next_request = __kgem_request_alloc(); +} + +static void kgem_close_list(struct kgem *kgem, struct list *head) +{ + while (!list_is_empty(head)) { + struct kgem_bo *bo; + + bo = list_first_entry(head, struct kgem_bo, list); + gem_close(kgem->fd, bo->handle); + list_del(&bo->list); + list_del(&bo->request); + free(bo); + } +} + +static void kgem_close_inactive(struct kgem *kgem) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++) + kgem_close_list(kgem, &kgem->inactive[i]); +} + +static void kgem_finish_partials(struct kgem *kgem) +{ + struct kgem_partial_bo *bo, *next; + + list_for_each_entry_safe(bo, next, &kgem->partial, base.list) { + if (!bo->base.exec) + continue; + + if (bo->write && bo->need_io) { + DBG(("%s: handle=%d, uploading %d/%d\n", + __FUNCTION__, bo->base.handle, bo->used, bo->alloc)); + gem_write(kgem->fd, bo->base.handle, + 0, bo->used, bo+1); + bo->need_io = 0; + } + + list_del(&bo->base.list); + kgem_bo_unref(kgem, &bo->base); + } +} + +static void kgem_cleanup(struct kgem *kgem) +{ + while (!list_is_empty(&kgem->partial)) { + struct kgem_bo *bo; + + bo = list_first_entry(&kgem->partial, + struct kgem_bo, + list); + list_del(&bo->list); + kgem_bo_unref(kgem, bo); + } + + while (!list_is_empty(&kgem->requests)) { + struct kgem_request *rq; + + rq = list_first_entry(&kgem->requests, + struct kgem_request, + list); + while (!list_is_empty(&rq->buffers)) { + struct kgem_bo *bo; + + bo = list_first_entry(&rq->buffers, + struct kgem_bo, + request); + list_del(&bo->request); + bo->rq = NULL; + bo->gpu = false; + if (bo->refcnt == 0) { + list_del(&bo->list); + gem_close(kgem->fd, bo->handle); + free(bo); + } + } + + list_del(&rq->list); + free(rq); + } + + kgem_close_inactive(kgem); +} + +static int kgem_batch_write(struct kgem *kgem, uint32_t handle) +{ + int ret; + + /* If there is no surface data, just upload the batch */ + if (kgem->surface == ARRAY_SIZE(kgem->batch)) + return gem_write(kgem->fd, handle, + 0, sizeof(uint32_t)*kgem->nbatch, + kgem->batch); + + /* Are the batch pages conjoint with the surface pages? */ + if (kgem->surface < kgem->nbatch + PAGE_SIZE/4) + return gem_write(kgem->fd, handle, + 0, sizeof(kgem->batch), + kgem->batch); + + /* Disjoint surface/batch, upload separately */ + ret = gem_write(kgem->fd, handle, + 0, sizeof(uint32_t)*kgem->nbatch, + kgem->batch); + if (ret) + return ret; + + return gem_write(kgem->fd, handle, + sizeof(uint32_t)*kgem->surface, + sizeof(kgem->batch) - sizeof(uint32_t)*kgem->surface, + kgem->batch + kgem->surface); +} + +void _kgem_submit(struct kgem *kgem) +{ + struct kgem_request *rq; + uint32_t batch_end; + int size; + + assert(kgem->nbatch); + assert(kgem->nbatch <= KGEM_BATCH_SIZE(kgem)); + + sna_kgem_context_switch(kgem, KGEM_NONE); + + batch_end = kgem_end_batch(kgem); + sna_kgem_flush(kgem); + + DBG(("batch[%d/%d]: %d %d %d, nreloc=%d, nexec=%d, nfence=%d, aperture=%d\n", + kgem->mode, kgem->ring, batch_end, kgem->nbatch, kgem->surface, + kgem->nreloc, kgem->nexec, kgem->nfence, kgem->aperture)); + + assert(kgem->nbatch <= ARRAY_SIZE(kgem->batch)); + assert(kgem->nreloc <= ARRAY_SIZE(kgem->reloc)); + assert(kgem->nexec < ARRAY_SIZE(kgem->exec)); + assert(kgem->nfence <= kgem->fence_max); +#if DEBUG_BATCH + __kgem_batch_debug(kgem, batch_end); +#endif + + rq = kgem->next_request; + if (kgem->surface != ARRAY_SIZE(kgem->batch)) + size = sizeof(kgem->batch); + else + size = kgem->nbatch * sizeof(kgem->batch[0]); + rq->bo = kgem_create_linear(kgem, size); + if (rq->bo) { + uint32_t handle = rq->bo->handle; + int i; + + i = kgem->nexec++; + kgem->exec[i].handle = handle; + kgem->exec[i].relocation_count = kgem->nreloc; + kgem->exec[i].relocs_ptr = (uintptr_t)kgem->reloc; + kgem->exec[i].alignment = 0; + kgem->exec[i].offset = 0; + kgem->exec[i].flags = 0; + kgem->exec[i].rsvd1 = 0; + kgem->exec[i].rsvd2 = 0; + + rq->bo->exec = &kgem->exec[i]; + list_add(&rq->bo->request, &rq->buffers); + + kgem_fixup_self_relocs(kgem, rq->bo); + kgem_finish_partials(kgem); + + if (kgem_batch_write(kgem, handle) == 0) { + struct drm_i915_gem_execbuffer2 execbuf; + int ret; + + execbuf.buffers_ptr = (uintptr_t)kgem->exec; + execbuf.buffer_count = kgem->nexec; + execbuf.batch_start_offset = 0; + execbuf.batch_len = batch_end*4; + execbuf.cliprects_ptr = 0; + execbuf.num_cliprects = 0; + execbuf.DR1 = 0; + execbuf.DR4 = 0; + execbuf.flags = kgem->ring; + execbuf.rsvd1 = 0; + execbuf.rsvd2 = 0; + + if (DBG_DUMP) { + int fd = open("/tmp/i915-batchbuffers.dump", + O_WRONLY | O_CREAT | O_APPEND, + 0666); + if (fd != -1) { + ret = write(fd, kgem->batch, batch_end*4); + fd = close(fd); + } + } + + ret = drmIoctl(kgem->fd, + DRM_IOCTL_I915_GEM_EXECBUFFER2, + &execbuf); + while (ret == -1 && errno == EBUSY) { + drmCommandNone(kgem->fd, DRM_I915_GEM_THROTTLE); + ret = drmIoctl(kgem->fd, + DRM_IOCTL_I915_GEM_EXECBUFFER2, + &execbuf); + } + if (ret == -1 && errno == EIO) { + DBG(("%s: GPU hang detected\n", __FUNCTION__)); + kgem->wedged = 1; + ret = 0; + } +#if DEBUG_KGEM + if (ret < 0) { + int i; + ErrorF("batch (end=%d, size=%d) submit failed: %d\n", + batch_end, size, errno); + + i = open("/tmp/batchbuffer", O_WRONLY | O_CREAT | O_APPEND, 0666); + if (i != -1) { + ret = write(i, kgem->batch, batch_end*4); + close(i); + } + + for (i = 0; i < kgem->nexec; i++) { + struct kgem_request *rq = kgem->next_request; + struct kgem_bo *bo, *found = NULL; + + list_for_each_entry(bo, &rq->buffers, request) { + if (bo->handle == kgem->exec[i].handle) { + found = bo; + break; + } + } + ErrorF("exec[%d] = handle:%d, presumed offset: %x, size: %d, tiling %d, fenced %d, deleted %d\n", + i, + kgem->exec[i].handle, + (int)kgem->exec[i].offset, + found ? found->size : 0, + found ? found->tiling : 0, + (int)(kgem->exec[i].flags & EXEC_OBJECT_NEEDS_FENCE), + found ? found->deleted : 1); + } + for (i = 0; i < kgem->nreloc; i++) { + ErrorF("reloc[%d] = pos:%d, target:%d, delta:%d, read:%x, write:%x, offset:%x\n", + i, + (int)kgem->reloc[i].offset, + kgem->reloc[i].target_handle, + kgem->reloc[i].delta, + kgem->reloc[i].read_domains, + kgem->reloc[i].write_domain, + (int)kgem->reloc[i].presumed_offset); + } + abort(); + } +#endif + assert(ret == 0); + + if (DEBUG_FLUSH_SYNC) { + struct drm_i915_gem_set_domain set_domain; + int ret; + + set_domain.handle = handle; + set_domain.read_domains = I915_GEM_DOMAIN_GTT; + set_domain.write_domain = I915_GEM_DOMAIN_GTT; + + ret = drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain); + if (ret == -1) { + DBG(("%s: sync: GPU hang detected\n", __FUNCTION__)); + kgem->wedged = 1; + } + } + } + } + + kgem_commit(kgem); + if (kgem->wedged) + kgem_cleanup(kgem); + + kgem->nfence = 0; + kgem->nexec = 0; + kgem->nreloc = 0; + kgem->aperture = 0; + kgem->nbatch = 0; + kgem->surface = ARRAY_SIZE(kgem->batch); + kgem->mode = KGEM_NONE; + kgem->flush = 0; + + kgem->retire = 1; + + sna_kgem_reset(kgem); +} + +void kgem_throttle(struct kgem *kgem) +{ + kgem->wedged |= drmCommandNone(kgem->fd, DRM_I915_GEM_THROTTLE) == -EIO; +} + +bool kgem_needs_expire(struct kgem *kgem) +{ + int i; + + if (!list_is_empty(&kgem->active)) + return true; + + for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++) { + if (!list_is_empty(&kgem->inactive[i])) + return true; + } + + return false; +} + +bool kgem_expire_cache(struct kgem *kgem) +{ + time_t now, expire; + struct kgem_bo *bo; + unsigned int size = 0, count = 0; + bool idle; + int i; + + _kgem_retire(kgem); + if (kgem->wedged) + kgem_cleanup(kgem); + + time(&now); + expire = 0; + + idle = true; + for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++) { + idle &= list_is_empty(&kgem->inactive[i]); + list_for_each_entry(bo, &kgem->inactive[i], list) { + assert(bo->deleted); + if (bo->delta) { + expire = now - 5; + break; + } + + bo->delta = now; + } + } + if (!kgem->need_purge) { + if (idle) + return false; + if (expire == 0) + return true; + } + + idle = true; + for (i = 0; i < ARRAY_SIZE(kgem->inactive); i++) { + while (!list_is_empty(&kgem->inactive[i])) { + bo = list_last_entry(&kgem->inactive[i], + struct kgem_bo, list); + + if (!gem_madvise(kgem->fd, bo->handle, + I915_MADV_DONTNEED)) { + if (bo->delta > expire) { + idle = false; + break; + } + } + + count++; + size += bo->size; + + gem_close(kgem->fd, bo->handle); + list_del(&bo->list); + free(bo); + } + } + + DBG(("%s: purge? %d -- expired %d objects, %d bytes\n", __FUNCTION__, kgem->need_purge, count, size)); + + kgem->need_purge = false; + return idle; + (void)count; + (void)size; +} + +static struct kgem_bo * +search_linear_cache(struct kgem *kgem, int size, bool active) +{ + struct kgem_bo *bo, *next; + struct list *cache; + + if (!active) { + cache = inactive(kgem, size); + kgem_retire(kgem); + } else + cache = &kgem->active; + + list_for_each_entry_safe(bo, next, cache, list) { + if (size > bo->size) + continue; + + if (active && bo->tiling != I915_TILING_NONE) + continue; + + list_del(&bo->list); + + if (bo->deleted) { + if (!gem_madvise(kgem->fd, bo->handle, + I915_MADV_WILLNEED)) { + kgem->need_purge = 1; + goto next_bo; + } + + bo->deleted = 0; + } + + if (I915_TILING_NONE != bo->tiling && + gem_set_tiling(kgem->fd, bo->handle, + I915_TILING_NONE, 0) != I915_TILING_NONE) + goto next_bo; + + bo->tiling = I915_TILING_NONE; + bo->pitch = 0; + bo->delta = 0; + bo->aperture_size = bo->size; + DBG((" %s: found handle=%d (size=%d) in linear %s cache\n", + __FUNCTION__, bo->handle, bo->size, + active ? "active" : "inactive")); + assert(bo->refcnt == 0); + assert(bo->reusable); + return bo; +next_bo: + list_del(&bo->request); + gem_close(kgem->fd, bo->handle); + free(bo); + } + + return NULL; +} + +struct kgem_bo *kgem_create_for_name(struct kgem *kgem, uint32_t name) +{ + struct drm_gem_open open_arg; + + DBG(("%s(name=%d)\n", __FUNCTION__, name)); + + memset(&open_arg, 0, sizeof(open_arg)); + open_arg.name = name; + if (drmIoctl(kgem->fd, DRM_IOCTL_GEM_OPEN, &open_arg)) + return NULL; + + DBG(("%s: new handle=%d\n", __FUNCTION__, open_arg.handle)); + return __kgem_bo_alloc(open_arg.handle, 0); +} + +struct kgem_bo *kgem_create_linear(struct kgem *kgem, int size) +{ + struct kgem_bo *bo; + uint32_t handle; + + DBG(("%s(%d)\n", __FUNCTION__, size)); + + size = ALIGN(size, PAGE_SIZE); + bo = search_linear_cache(kgem, size, false); + if (bo) + return kgem_bo_reference(bo); + + handle = gem_create(kgem->fd, size); + if (handle == 0) + return NULL; + + DBG(("%s: new handle=%d\n", __FUNCTION__, handle)); + return __kgem_bo_alloc(handle, size); +} + +int kgem_choose_tiling(struct kgem *kgem, int tiling, int width, int height, int bpp) +{ + if (kgem->gen < 40) { + if (tiling) { + if (width * bpp > 8192 * 8) { + DBG(("%s: pitch too large for tliing [%d]\n", + __FUNCTION__, width*bpp/8)); + return I915_TILING_NONE; + } + + if (width > 2048 || height > 2048) { + DBG(("%s: large buffer (%dx%d), forcing TILING_X\n", + __FUNCTION__, width, height)); + return -I915_TILING_X; + } + } + } else { + if (width*bpp > (MAXSHORT-512) * 8) { + DBG(("%s: large pitch [%d], forcing TILING_X\n", + __FUNCTION__, width*bpp/8)); + return -I915_TILING_X; + } + + if (tiling && (width > 8192 || height > 8192)) { + DBG(("%s: large tiled buffer [%dx%d], forcing TILING_X\n", + __FUNCTION__, width, height)); + return -I915_TILING_X; + } + } + + if (tiling == I915_TILING_Y && height < 16) { + DBG(("%s: too short [%d] for TILING_Y\n", + __FUNCTION__,height)); + tiling = I915_TILING_X; + } + if (tiling == I915_TILING_X && height < 4) { + DBG(("%s: too short [%d] for TILING_X\n", + __FUNCTION__, height)); + tiling = I915_TILING_NONE; + } + + if (tiling == I915_TILING_X && width * bpp < 512/2) { + DBG(("%s: too thin [%d] for TILING_X\n", + __FUNCTION__, width)); + tiling = I915_TILING_NONE; + } + if (tiling == I915_TILING_Y && width * bpp < 32/2) { + DBG(("%s: too thin [%d] for TILING_Y\n", + __FUNCTION__, width)); + tiling = I915_TILING_NONE; + } + + DBG(("%s: %dx%d -> %d\n", __FUNCTION__, width, height, tiling)); + return tiling; +} + +static bool _kgem_can_create_2d(struct kgem *kgem, + int width, int height, int bpp, int tiling) +{ + uint32_t pitch, size; + + if (bpp < 8) + return false; + + size = kgem_surface_size(kgem, width, height, bpp, tiling, &pitch); + if (size == 0 || size > kgem->aperture_low) + size = kgem_surface_size(kgem, width, height, bpp, I915_TILING_NONE, &pitch); + return size > 0 && size <= kgem->aperture_low; +} + +#if DEBUG_KGEM +bool kgem_can_create_2d(struct kgem *kgem, + int width, int height, int bpp, int tiling) +{ + bool ret = _kgem_can_create_2d(kgem, width, height, bpp, tiling); + DBG(("%s(%dx%d, bpp=%d, tiling=%d) = %d\n", __FUNCTION__, + width, height, bpp, tiling, ret)); + return ret; +} +#else +bool kgem_can_create_2d(struct kgem *kgem, + int width, int height, int bpp, int tiling) +{ + return _kgem_can_create_2d(kgem, width, height, bpp, tiling); +} +#endif + +static int kgem_bo_aperture_size(struct kgem *kgem, struct kgem_bo *bo) +{ + int size; + + if (kgem->gen >= 40 || bo->tiling == I915_TILING_NONE) { + size = bo->size; + } else { + if (kgem->gen < 30) + size = 512 * 1024; + else + size = 1024 * 1024; + while (size < bo->size) + size *= 2; + } + return size; +} + +struct kgem_bo *kgem_create_2d(struct kgem *kgem, + int width, + int height, + int bpp, + int tiling, + uint32_t flags) +{ + struct list *cache; + struct kgem_bo *bo, *next; + uint32_t pitch, tiled_height[3], size; + uint32_t handle; + int exact = flags & CREATE_EXACT; + int search; + int i; + + if (tiling < 0) + tiling = -tiling, exact = 1; + + DBG(("%s(%dx%d, bpp=%d, tiling=%d, exact=%d, inactive=%d)\n", __FUNCTION__, + width, height, bpp, tiling, !!exact, !!(flags & CREATE_INACTIVE))); + + assert(_kgem_can_create_2d(kgem, width, height, bpp, tiling)); + size = kgem_surface_size(kgem, width, height, bpp, tiling, &pitch); + assert(size && size <= kgem->aperture_low); + if (flags & CREATE_INACTIVE) + goto skip_active_search; + + for (i = 0; i <= I915_TILING_Y; i++) + tiled_height[i] = kgem_aligned_height(height, i); + + search = 0; + /* Best active match first */ + list_for_each_entry_safe(bo, next, &kgem->active, list) { + uint32_t s; + + search++; + + if (exact) { + if (bo->tiling != tiling) + continue; + } else { + if (bo->tiling > tiling) + continue; + } + + if (bo->tiling) { + if (bo->pitch < pitch) { + DBG(("tiled and pitch too small: tiling=%d, (want %d), pitch=%d, need %d\n", + bo->tiling, tiling, + bo->pitch, pitch)); + continue; + } + } else + bo->pitch = pitch; + + s = bo->pitch * tiled_height[bo->tiling]; + if (s > bo->size) { + DBG(("size too small: %d < %d\n", + bo->size, s)); + continue; + } + + list_del(&bo->list); + + if (bo->deleted) { + if (!gem_madvise(kgem->fd, bo->handle, + I915_MADV_WILLNEED)) { + kgem->need_purge = 1; + gem_close(kgem->fd, bo->handle); + list_del(&bo->request); + free(bo); + continue; + } + + bo->deleted = 0; + } + + bo->unique_id = kgem_get_unique_id(kgem); + bo->delta = 0; + bo->aperture_size = kgem_bo_aperture_size(kgem, bo); + DBG((" from active: pitch=%d, tiling=%d, handle=%d, id=%d\n", + bo->pitch, bo->tiling, bo->handle, bo->unique_id)); + assert(bo->refcnt == 0); + assert(bo->reusable); + return kgem_bo_reference(bo); + } + + DBG(("searched %d active, no match\n", search)); + +skip_active_search: + /* Now just look for a close match and prefer any currently active */ + cache = inactive(kgem, size); + list_for_each_entry_safe(bo, next, cache, list) { + if (size > bo->size) { + DBG(("inactive too small: %d < %d\n", + bo->size, size)); + continue; + } + + if (bo->tiling != tiling || + (tiling != I915_TILING_NONE && bo->pitch != pitch)) { + if (tiling != gem_set_tiling(kgem->fd, + bo->handle, + tiling, pitch)) + goto next_bo; + } + + bo->pitch = pitch; + bo->tiling = tiling; + + list_del(&bo->list); + + if (bo->deleted) { + if (!gem_madvise(kgem->fd, bo->handle, + I915_MADV_WILLNEED)) { + kgem->need_purge = 1; + goto next_bo; + } + + bo->deleted = 0; + } + + bo->delta = 0; + bo->unique_id = kgem_get_unique_id(kgem); + bo->aperture_size = kgem_bo_aperture_size(kgem, bo); + assert(bo->pitch); + DBG((" from inactive: pitch=%d, tiling=%d: handle=%d, id=%d\n", + bo->pitch, bo->tiling, bo->handle, bo->unique_id)); + assert(bo->refcnt == 0); + assert(bo->reusable); + return kgem_bo_reference(bo); + +next_bo: + gem_close(kgem->fd, bo->handle); + list_del(&bo->request); + free(bo); + continue; + } + + handle = gem_create(kgem->fd, size); + if (handle == 0) + return NULL; + + bo = __kgem_bo_alloc(handle, size); + if (!bo) { + gem_close(kgem->fd, handle); + return NULL; + } + + bo->unique_id = kgem_get_unique_id(kgem); + bo->pitch = pitch; + if (tiling != I915_TILING_NONE) + bo->tiling = gem_set_tiling(kgem->fd, handle, tiling, pitch); + bo->aperture_size = kgem_bo_aperture_size(kgem, bo); + + DBG((" new pitch=%d, tiling=%d, handle=%d, id=%d\n", + bo->pitch, bo->tiling, bo->handle, bo->unique_id)); + return bo; +} + +void _kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo) +{ + if (bo->proxy) { + kgem_bo_unref(kgem, bo->proxy); + list_del(&bo->request); + free(bo); + return; + } + + __kgem_bo_destroy(kgem, bo); +} + +void __kgem_flush(struct kgem *kgem, struct kgem_bo *bo) +{ + /* The kernel will emit a flush *and* update its own flushing lists. */ + kgem_busy(kgem, bo->handle); +} + +bool kgem_check_bo(struct kgem *kgem, struct kgem_bo *bo) +{ + if (bo == NULL) + return true; + + if (bo->exec) + return true; + + if (kgem->aperture > kgem->aperture_low) + return false; + + if (bo->size + kgem->aperture > kgem->aperture_high) + return false; + + if (kgem->nexec == KGEM_EXEC_SIZE(kgem)) + return false; + + return true; +} + +bool kgem_check_bo_fenced(struct kgem *kgem, ...) +{ + va_list ap; + struct kgem_bo *bo; + int num_fence = 0; + int num_exec = 0; + int size = 0; + + if (kgem->aperture > kgem->aperture_low) + return false; + + va_start(ap, kgem); + while ((bo = va_arg(ap, struct kgem_bo *))) { + if (bo->exec) { + if (kgem->gen >= 40 || bo->tiling == I915_TILING_NONE) + continue; + + if ((bo->exec->flags & EXEC_OBJECT_NEEDS_FENCE) == 0) + num_fence++; + + continue; + } + + size += bo->size; + num_exec++; + if (kgem->gen < 40 && bo->tiling) + num_fence++; + } + va_end(ap); + + if (size + kgem->aperture > kgem->aperture_high) + return false; + + if (kgem->nexec + num_exec >= KGEM_EXEC_SIZE(kgem)) + return false; + + if (kgem->nfence + num_fence >= kgem->fence_max) + return false; + + return true; +} + +uint32_t kgem_add_reloc(struct kgem *kgem, + uint32_t pos, + struct kgem_bo *bo, + uint32_t read_write_domain, + uint32_t delta) +{ + int index; + + index = kgem->nreloc++; + assert(index < ARRAY_SIZE(kgem->reloc)); + kgem->reloc[index].offset = pos * sizeof(kgem->batch[0]); + if (bo) { + assert(!bo->deleted); + + delta += bo->delta; + if (bo->proxy) { + /* need to release the cache upon batch submit */ + list_move(&bo->request, &kgem->next_request->buffers); + bo->exec = &_kgem_dummy_exec; + bo = bo->proxy; + } + + assert(!bo->deleted); + + if (bo->exec == NULL) { + _kgem_add_bo(kgem, bo); + if (bo->needs_flush && + (read_write_domain >> 16) != I915_GEM_DOMAIN_RENDER) + bo->needs_flush = false; + } + + if (read_write_domain & KGEM_RELOC_FENCED && kgem->gen < 40) { + if (bo->tiling && + (bo->exec->flags & EXEC_OBJECT_NEEDS_FENCE) == 0) { + assert(kgem->nfence < kgem->fence_max); + kgem->nfence++; + } + bo->exec->flags |= EXEC_OBJECT_NEEDS_FENCE; + } + + kgem->reloc[index].delta = delta; + kgem->reloc[index].target_handle = bo->handle; + kgem->reloc[index].presumed_offset = bo->presumed_offset; + + if (read_write_domain & 0x7fff) + bo->needs_flush = bo->dirty = true; + + delta += bo->presumed_offset; + } else { + kgem->reloc[index].delta = delta; + kgem->reloc[index].target_handle = 0; + kgem->reloc[index].presumed_offset = 0; + } + kgem->reloc[index].read_domains = read_write_domain >> 16; + kgem->reloc[index].write_domain = read_write_domain & 0x7fff; + + return delta; +} + +void *kgem_bo_map(struct kgem *kgem, struct kgem_bo *bo, int prot) +{ + return gem_mmap(kgem->fd, bo->handle, bo->size, prot); +} + +uint32_t kgem_bo_flink(struct kgem *kgem, struct kgem_bo *bo) +{ + struct drm_gem_flink flink; + int ret; + + memset(&flink, 0, sizeof(flink)); + flink.handle = bo->handle; + ret = drmIoctl(kgem->fd, DRM_IOCTL_GEM_FLINK, &flink); + if (ret) + return 0; + + bo->reusable = false; + return flink.name; +} + +#if defined(USE_VMAP) && defined(I915_PARAM_HAS_VMAP) +static uint32_t gem_vmap(int fd, void *ptr, int size, int read_only) +{ + struct drm_i915_gem_vmap vmap; + + vmap.user_ptr = (uintptr_t)ptr; + vmap.user_size = size; + vmap.flags = 0; + if (read_only) + vmap.flags |= I915_VMAP_READ_ONLY; + + if (drmIoctl(fd, DRM_IOCTL_I915_GEM_VMAP, &vmap)) + return 0; + + return vmap.handle; +} + +struct kgem_bo *kgem_create_map(struct kgem *kgem, + void *ptr, uint32_t size, + bool read_only) +{ + struct kgem_bo *bo; + uint32_t handle; + + if (!kgem->has_vmap) + return NULL; + + handle = gem_vmap(kgem->fd, ptr, size, read_only); + if (handle == 0) + return NULL; + + bo = __kgem_bo_alloc(handle, size); + if (bo == NULL) { + gem_close(kgem->fd, handle); + return NULL; + } + + bo->reusable = false; + bo->sync = true; + DBG(("%s(ptr=%p, size=%d, read_only=%d) => handle=%d\n", + __FUNCTION__, ptr, size, read_only, handle)); + return bo; +} +#else +static uint32_t gem_vmap(int fd, void *ptr, int size, int read_only) +{ + return 0; +} + +struct kgem_bo *kgem_create_map(struct kgem *kgem, + void *ptr, uint32_t size, + bool read_only) +{ + return NULL; +} +#endif + +void kgem_bo_sync(struct kgem *kgem, struct kgem_bo *bo, bool for_write) +{ + struct drm_i915_gem_set_domain set_domain; + + kgem_bo_submit(kgem, bo); + if (for_write ? bo->cpu_write : bo->cpu_read) + return; + + set_domain.handle = bo->handle; + set_domain.read_domains = I915_GEM_DOMAIN_CPU; + set_domain.write_domain = for_write ? I915_GEM_DOMAIN_CPU : 0; + + drmIoctl(kgem->fd, DRM_IOCTL_I915_GEM_SET_DOMAIN, &set_domain); + _kgem_retire(kgem); + bo->cpu_read = true; + if (for_write) + bo->cpu_write = true; +} + +void kgem_clear_dirty(struct kgem *kgem) +{ + struct kgem_request *rq = kgem->next_request; + struct kgem_bo *bo; + + list_for_each_entry(bo, &rq->buffers, request) + bo->dirty = false; +} + +/* Flush the contents of the RenderCache and invalidate the TextureCache */ +void kgem_emit_flush(struct kgem *kgem) +{ + if (kgem->nbatch == 0) + return; + + if (!kgem_check_batch(kgem, 4)) { + _kgem_submit(kgem); + return; + } + + DBG(("%s()\n", __FUNCTION__)); + + if (kgem->ring == KGEM_BLT) { + kgem->batch[kgem->nbatch++] = MI_FLUSH_DW | 2; + kgem->batch[kgem->nbatch++] = 0; + kgem->batch[kgem->nbatch++] = 0; + kgem->batch[kgem->nbatch++] = 0; + } else if (kgem->gen >= 50 && 0) { + kgem->batch[kgem->nbatch++] = PIPE_CONTROL | 2; + kgem->batch[kgem->nbatch++] = + PIPE_CONTROL_WC_FLUSH | + PIPE_CONTROL_TC_FLUSH | + PIPE_CONTROL_NOWRITE; + kgem->batch[kgem->nbatch++] = 0; + kgem->batch[kgem->nbatch++] = 0; + } else { + if ((kgem->batch[kgem->nbatch-1] & (0xff<<23)) == MI_FLUSH) + kgem->nbatch--; + kgem->batch[kgem->nbatch++] = MI_FLUSH | MI_INVALIDATE_MAP_CACHE; + } + + kgem_clear_dirty(kgem); +} + +struct kgem_bo *kgem_create_proxy(struct kgem_bo *target, + int offset, int length) +{ + struct kgem_bo *bo; + + assert(target->proxy == NULL); + + bo = __kgem_bo_alloc(target->handle, length); + if (bo == NULL) + return NULL; + + bo->reusable = false; + bo->proxy = kgem_bo_reference(target); + bo->delta = offset; + return bo; +} + +struct kgem_bo *kgem_create_buffer(struct kgem *kgem, + uint32_t size, uint32_t flags, + void **ret) +{ + struct kgem_partial_bo *bo; + bool write = !!(flags & KGEM_BUFFER_WRITE); + int offset = 0; + + DBG(("%s: size=%d, flags=%x\n", __FUNCTION__, size, flags)); + + list_for_each_entry(bo, &kgem->partial, base.list) { + if (bo->write != write) + continue; + if (bo->used + size < bo->alloc) { + DBG(("%s: reusing partial buffer? used=%d, total=%d\n", + __FUNCTION__, bo->used, bo->alloc)); + offset = bo->used; + bo->used += size; + break; + } + } + + if (offset == 0) { + uint32_t handle; + int alloc; + + alloc = (flags & KGEM_BUFFER_LAST) ? 4096 : 32 * 1024; + alloc = ALIGN(size, alloc); + + bo = malloc(sizeof(*bo) + alloc); + if (bo == NULL) + return NULL; + + handle = 0; + if (kgem->has_vmap) + handle = gem_vmap(kgem->fd, bo+1, alloc, write); + if (handle == 0) { + struct kgem_bo *old; + + old = NULL; + if (!write) + old = search_linear_cache(kgem, alloc, true); + if (old == NULL) + old = search_linear_cache(kgem, alloc, false); + if (old) { + memcpy(&bo->base, old, sizeof(*old)); + if (old->rq) + list_replace(&old->request, + &bo->base.request); + else + list_init(&bo->base.request); + free(old); + bo->base.refcnt = 1; + } else { + if (!__kgem_bo_init(&bo->base, + gem_create(kgem->fd, alloc), + alloc)) { + free(bo); + return NULL; + } + } + bo->need_io = true; + } else { + __kgem_bo_init(&bo->base, handle, alloc); + bo->base.reusable = false; + bo->base.sync = true; + bo->need_io = 0; + } + + bo->alloc = alloc; + bo->used = size; + bo->write = write; + + list_add(&bo->base.list, &kgem->partial); + DBG(("%s(size=%d) new handle=%d\n", + __FUNCTION__, alloc, bo->base.handle)); + } + + *ret = (char *)(bo+1) + offset; + return kgem_create_proxy(&bo->base, offset, size); +} + +struct kgem_bo *kgem_upload_source_image(struct kgem *kgem, + const void *data, + int x, int y, + int width, int height, + int stride, int bpp) +{ + int dst_stride = ALIGN(width * bpp, 32) >> 3; + int size = dst_stride * height; + struct kgem_bo *bo; + void *dst; + + DBG(("%s : (%d, %d), (%d, %d), stride=%d, bpp=%d\n", + __FUNCTION__, x, y, width, height, stride, bpp)); + + bo = kgem_create_buffer(kgem, size, KGEM_BUFFER_WRITE, &dst); + if (bo == NULL) + return NULL; + + memcpy_blt(data, dst, bpp, + stride, dst_stride, + x, y, + 0, 0, + width, height); + + bo->pitch = dst_stride; + return bo; +} + +void kgem_buffer_sync(struct kgem *kgem, struct kgem_bo *_bo) +{ + struct kgem_partial_bo *bo; + + if (_bo->proxy) + _bo = _bo->proxy; + + bo = (struct kgem_partial_bo *)_bo; + + DBG(("%s(need_io=%s, sync=%d)\n", __FUNCTION__, + bo->need_io ? bo->write ? "write" : "read" : "none", + bo->base.sync)); + + if (bo->need_io) { + if (bo->write) + gem_write(kgem->fd, bo->base.handle, + 0, bo->used, bo+1); + else + gem_read(kgem->fd, bo->base.handle, bo+1, bo->used); + _kgem_retire(kgem); + bo->need_io = 0; + } + + if (bo->base.sync) + kgem_bo_sync(kgem, &bo->base, bo->write); +} diff --git a/src/sna/kgem.h b/src/sna/kgem.h new file mode 100644 index 00000000..37803bdf --- /dev/null +++ b/src/sna/kgem.h @@ -0,0 +1,332 @@ +/* + * Copyright (c) 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Chris Wilson <chris@chris-wilson.co.uk> + * + */ + +#include <stdint.h> +#include <stdbool.h> +#include <stdarg.h> + +#include <i915_drm.h> + +#ifndef KGEM_H +#define KGEM_H + +struct kgem_bo { + struct kgem_bo *proxy; + + struct list list; + struct list request; + + struct kgem_request *rq; + struct drm_i915_gem_exec_object2 *exec; + + uint16_t src_bound, dst_bound; + uint32_t unique_id; + uint32_t refcnt; + uint32_t handle; + uint32_t presumed_offset; + uint32_t size; + uint32_t aperture_size; + uint32_t delta; + + uint32_t pitch : 16; + uint32_t tiling : 2; + uint32_t reusable : 1; + uint32_t dirty : 1; + uint32_t gpu : 1; + uint32_t needs_flush : 1; + uint32_t cpu_read : 1; + uint32_t cpu_write : 1; + uint32_t flush : 1; + uint32_t sync : 1; + uint32_t deleted : 1; +}; + +struct kgem_request { + struct list list; + struct kgem_bo *bo; + struct list buffers; +}; + +struct kgem { + int fd; + int wedged; + int gen; + + uint32_t unique_id; + + enum kgem_mode { + /* order matches I915_EXEC_RING ordering */ + KGEM_NONE = 0, + KGEM_RENDER, + KGEM_BSD, + KGEM_BLT, + } mode, ring; + + struct list active; + struct list inactive[16]; + struct list partial; + struct list requests; + struct kgem_request *next_request; + + uint16_t nbatch; + uint16_t surface; + uint16_t nexec; + uint16_t nreloc; + uint16_t nfence; + + uint32_t retire; + uint32_t flush; + uint32_t need_purge; + + uint32_t has_vmap :1; + uint32_t has_relaxed_fencing :1; + + uint16_t fence_max; + uint32_t aperture_high, aperture_low, aperture; + + uint32_t batch[4*1024]; + struct drm_i915_gem_exec_object2 exec[256]; + struct drm_i915_gem_relocation_entry reloc[384]; +}; + +#define KGEM_BATCH_RESERVED 2 +#define KGEM_RELOC_RESERVED 4 +#define KGEM_EXEC_RESERVED 1 + +#define ARRAY_SIZE(a) (sizeof(a)/sizeof((a)[0])) +#define KGEM_BATCH_SIZE(K) (ARRAY_SIZE((K)->batch)-KGEM_BATCH_RESERVED) +#define KGEM_EXEC_SIZE(K) (ARRAY_SIZE((K)->exec)-KGEM_EXEC_RESERVED) +#define KGEM_RELOC_SIZE(K) (ARRAY_SIZE((K)->reloc)-KGEM_RELOC_RESERVED) + +void kgem_init(struct kgem *kgem, int fd, int gen); + +struct kgem_bo *kgem_create_map(struct kgem *kgem, + void *ptr, uint32_t size, + bool read_only); + +struct kgem_bo *kgem_create_for_name(struct kgem *kgem, uint32_t name); + +struct kgem_bo *kgem_create_linear(struct kgem *kgem, int size); +struct kgem_bo *kgem_create_proxy(struct kgem_bo *target, + int offset, int length); + +struct kgem_bo *kgem_upload_source_image(struct kgem *kgem, + const void *data, + int x, int y, + int width, int height, + int stride, int bpp); + +int kgem_choose_tiling(struct kgem *kgem, + int tiling, int width, int height, int bpp); +bool kgem_can_create_2d(struct kgem *kgem, + int width, int height, int bpp, int tiling); +enum { + CREATE_EXACT = 0x1, + CREATE_INACTIVE = 0x2, +}; +struct kgem_bo *kgem_create_2d(struct kgem *kgem, + int width, + int height, + int bpp, + int tiling, + uint32_t flags); + +void _kgem_retire(struct kgem *kgem); +static inline void kgem_retire(struct kgem *kgem) +{ + if (kgem->retire) + _kgem_retire(kgem); +} + +void _kgem_submit(struct kgem *kgem); +static inline void kgem_submit(struct kgem *kgem) +{ + if (kgem->nbatch) + _kgem_submit(kgem); +} + +static inline void kgem_bo_submit(struct kgem *kgem, struct kgem_bo *bo) +{ + if (bo->exec) + _kgem_submit(kgem); +} + +void __kgem_flush(struct kgem *kgem, struct kgem_bo *bo); +static inline void kgem_bo_flush(struct kgem *kgem, struct kgem_bo *bo) +{ + kgem_bo_submit(kgem, bo); + __kgem_flush(kgem, bo); +} + +static inline struct kgem_bo *kgem_bo_reference(struct kgem_bo *bo) +{ + bo->refcnt++; + return bo; +} + +void _kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo); +static inline void kgem_bo_destroy(struct kgem *kgem, struct kgem_bo *bo) +{ + assert(bo->refcnt); + if (--bo->refcnt == 0) + _kgem_bo_destroy(kgem, bo); +} + +void kgem_emit_flush(struct kgem *kgem); +void kgem_clear_dirty(struct kgem *kgem); + +extern void sna_kgem_context_switch(struct kgem *kgem, int new_mode); +static inline void kgem_set_mode(struct kgem *kgem, enum kgem_mode mode) +{ +#if DEBUG_FLUSH_CACHE + kgem_emit_flush(kgem); +#endif + +#if DEBUG_FLUSH_BATCH + kgem_submit(kgem); +#endif + + if (kgem->mode == mode) + return; + + sna_kgem_context_switch(kgem, mode); + + kgem->mode = mode; +} + +static inline void _kgem_set_mode(struct kgem *kgem, enum kgem_mode mode) +{ + if (kgem->nbatch) + kgem->mode = mode; +} + +static inline bool kgem_check_batch(struct kgem *kgem, int num_dwords) +{ + return kgem->nbatch + num_dwords + KGEM_BATCH_RESERVED <= kgem->surface; +} + +static inline bool kgem_check_reloc(struct kgem *kgem, int num_reloc) +{ + return kgem->nreloc + num_reloc <= KGEM_RELOC_SIZE(kgem); +} + +static inline bool kgem_check_batch_with_surfaces(struct kgem *kgem, + int num_dwords, + int num_surfaces) +{ + return (int)(kgem->nbatch + num_dwords + KGEM_BATCH_RESERVED) <= (int)(kgem->surface - num_surfaces*8) && + kgem_check_reloc(kgem, num_surfaces); +} + +static inline uint32_t *kgem_get_batch(struct kgem *kgem, int num_dwords) +{ + if (!kgem_check_batch(kgem, num_dwords)) + _kgem_submit(kgem); + + return kgem->batch + kgem->nbatch; +} + +static inline void kgem_advance_batch(struct kgem *kgem, int num_dwords) +{ + kgem->nbatch += num_dwords; +} + +bool kgem_check_bo(struct kgem *kgem, struct kgem_bo *bo); +bool kgem_check_bo_fenced(struct kgem *kgem, ...) __attribute__((sentinel(NULL))); + +void _kgem_add_bo(struct kgem *kgem, struct kgem_bo *bo); +static inline void kgem_add_bo(struct kgem *kgem, struct kgem_bo *bo) +{ + if (bo->proxy) + bo = bo->proxy; + + if (bo->exec == NULL) + _kgem_add_bo(kgem, bo); +} + +#define KGEM_RELOC_FENCED 0x8000 +uint32_t kgem_add_reloc(struct kgem *kgem, + uint32_t pos, + struct kgem_bo *bo, + uint32_t read_write_domains, + uint32_t delta); + +void *kgem_bo_map(struct kgem *kgem, struct kgem_bo *bo, int prot); +uint32_t kgem_bo_flink(struct kgem *kgem, struct kgem_bo *bo); + +Bool kgem_bo_write(struct kgem *kgem, + struct kgem_bo *bo, + const void *data, + int length); + +static inline bool kgem_bo_is_busy(struct kgem *kgem, struct kgem_bo *bo) +{ + if (bo->exec) + return true; + if (!bo->gpu) + return false; + + kgem_retire(kgem); + return bo->rq != NULL; +} + +static inline bool kgem_bo_is_dirty(struct kgem_bo *bo) +{ + if (bo == NULL) + return FALSE; + + if (bo->proxy) + bo = bo->proxy; + return bo->dirty; +} +static inline void kgem_bo_mark_dirty(struct kgem_bo *bo) +{ + if (bo->proxy) + bo = bo->proxy; + bo->dirty = true; +} + +void kgem_bo_sync(struct kgem *kgem, struct kgem_bo *bo, bool for_write); + +#define KGEM_BUFFER_WRITE 0x1 +#define KGEM_BUFFER_LAST 0x2 +struct kgem_bo *kgem_create_buffer(struct kgem *kgem, + uint32_t size, uint32_t flags, + void **ret); +void kgem_buffer_sync(struct kgem *kgem, struct kgem_bo *bo); + +void kgem_throttle(struct kgem *kgem); +bool kgem_needs_expire(struct kgem *kgem); +bool kgem_expire_cache(struct kgem *kgem); + +#if HAS_EXTRA_DEBUG +void __kgem_batch_debug(struct kgem *kgem, uint32_t nbatch); +#else +static inline void __kgem_batch_debug(struct kgem *kgem, uint32_t nbatch) {} +#endif + +#endif /* KGEM_H */ diff --git a/src/sna/kgem_debug.c b/src/sna/kgem_debug.c new file mode 100644 index 00000000..0dcd7065 --- /dev/null +++ b/src/sna/kgem_debug.c @@ -0,0 +1,390 @@ +/* + * Copyright © 2007-2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Eric Anholt <eric@anholt.net> + * + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <sys/mman.h> +#include <assert.h> + +#include "sna.h" +#include "sna_reg.h" + +#include "kgem_debug.h" + +struct drm_i915_gem_relocation_entry * +kgem_debug_get_reloc_entry(struct kgem *kgem, uint32_t offset) +{ + int i; + + offset *= sizeof(uint32_t); + + for (i = 0; i < kgem->nreloc; i++) + if (kgem->reloc[i].offset == offset) + return kgem->reloc+i; + + return NULL; +} + +struct kgem_bo * +kgem_debug_get_bo_for_reloc_entry(struct kgem *kgem, + struct drm_i915_gem_relocation_entry *reloc) +{ + struct kgem_bo *bo; + + if (reloc == NULL) + return NULL; + + list_for_each_entry(bo, &kgem->next_request->buffers, request) + if (bo->handle == reloc->target_handle && bo->proxy == NULL) + break; + + assert(&bo->request != &kgem->next_request->buffers); + + return bo; +} + +static int kgem_debug_handle_is_fenced(struct kgem *kgem, uint32_t handle) +{ + int i; + + for (i = 0; i < kgem->nexec; i++) + if (kgem->exec[i].handle == handle) + return kgem->exec[i].flags & EXEC_OBJECT_NEEDS_FENCE; + + return 0; +} + +void +kgem_debug_print(const uint32_t *data, + uint32_t offset, unsigned int index, + char *fmt, ...) +{ + va_list va; + char buf[240]; + int len; + + len = snprintf(buf, sizeof(buf), + "0x%08x: 0x%08x: %s", + (offset + index) * 4, + data[index], + index == 0 ? "" : " "); + + va_start(va, fmt); + vsnprintf(buf + len, sizeof(buf) - len, fmt, va); + va_end(va); + + ErrorF("%s", buf); +} + +static int +decode_nop(struct kgem *kgem, uint32_t offset) +{ + uint32_t *data = kgem->batch + offset; + kgem_debug_print(data, offset, 0, "UNKNOWN\n"); + assert(0); + return 1; +} + +static int +decode_mi(struct kgem *kgem, uint32_t offset) +{ + static const struct { + uint32_t opcode; + int len_mask; + int min_len; + int max_len; + const char *name; + } opcodes[] = { + { 0x08, 0, 1, 1, "MI_ARB_ON_OFF" }, + { 0x0a, 0, 1, 1, "MI_BATCH_BUFFER_END" }, + { 0x30, 0x3f, 3, 3, "MI_BATCH_BUFFER" }, + { 0x31, 0x3f, 2, 2, "MI_BATCH_BUFFER_START" }, + { 0x14, 0x3f, 3, 3, "MI_DISPLAY_BUFFER_INFO" }, + { 0x04, 0, 1, 1, "MI_FLUSH" }, + { 0x22, 0x1f, 3, 3, "MI_LOAD_REGISTER_IMM" }, + { 0x13, 0x3f, 2, 2, "MI_LOAD_SCAN_LINES_EXCL" }, + { 0x12, 0x3f, 2, 2, "MI_LOAD_SCAN_LINES_INCL" }, + { 0x00, 0, 1, 1, "MI_NOOP" }, + { 0x11, 0x3f, 2, 2, "MI_OVERLAY_FLIP" }, + { 0x07, 0, 1, 1, "MI_REPORT_HEAD" }, + { 0x18, 0x3f, 2, 2, "MI_SET_CONTEXT" }, + { 0x20, 0x3f, 3, 4, "MI_STORE_DATA_IMM" }, + { 0x21, 0x3f, 3, 4, "MI_STORE_DATA_INDEX" }, + { 0x24, 0x3f, 3, 3, "MI_STORE_REGISTER_MEM" }, + { 0x02, 0, 1, 1, "MI_USER_INTERRUPT" }, + { 0x03, 0, 1, 1, "MI_WAIT_FOR_EVENT" }, + { 0x16, 0x7f, 3, 3, "MI_SEMAPHORE_MBOX" }, + { 0x26, 0x1f, 3, 4, "MI_FLUSH_DW" }, + { 0x0b, 0, 1, 1, "MI_SUSPEND_FLUSH" }, + }; + uint32_t *data = kgem->batch + offset; + int op; + + for (op = 0; op < ARRAY_SIZE(opcodes); op++) { + if ((data[0] & 0x1f800000) >> 23 == opcodes[op].opcode) { + unsigned int len = 1, i; + + kgem_debug_print(data, offset, 0, "%s\n", opcodes[op].name); + if (opcodes[op].max_len > 1) { + len = (data[0] & opcodes[op].len_mask) + 2; + if (len < opcodes[op].min_len || + len > opcodes[op].max_len) + { + ErrorF("Bad length (%d) in %s, [%d, %d]\n", + len, opcodes[op].name, + opcodes[op].min_len, + opcodes[op].max_len); + assert(0); + } + } + + for (i = 1; i < len; i++) + kgem_debug_print(data, offset, i, "dword %d\n", i); + + return len; + } + } + + kgem_debug_print(data, offset, 0, "MI UNKNOWN\n"); + assert(0); + return 1; +} + +static int +decode_2d(struct kgem *kgem, uint32_t offset) +{ + static const struct { + uint32_t opcode; + int min_len; + int max_len; + const char *name; + } opcodes[] = { + { 0x40, 5, 5, "COLOR_BLT" }, + { 0x43, 6, 6, "SRC_COPY_BLT" }, + { 0x01, 8, 8, "XY_SETUP_BLT" }, + { 0x11, 9, 9, "XY_SETUP_MONO_PATTERN_SL_BLT" }, + { 0x03, 3, 3, "XY_SETUP_CLIP_BLT" }, + { 0x24, 2, 2, "XY_PIXEL_BLT" }, + { 0x25, 3, 3, "XY_SCANLINES_BLT" }, + { 0x26, 4, 4, "Y_TEXT_BLT" }, + { 0x31, 5, 134, "XY_TEXT_IMMEDIATE_BLT" }, + { 0x50, 6, 6, "XY_COLOR_BLT" }, + { 0x51, 6, 6, "XY_PAT_BLT" }, + { 0x76, 8, 8, "XY_PAT_CHROMA_BLT" }, + { 0x72, 7, 135, "XY_PAT_BLT_IMMEDIATE" }, + { 0x77, 9, 137, "XY_PAT_CHROMA_BLT_IMMEDIATE" }, + { 0x52, 9, 9, "XY_MONO_PAT_BLT" }, + { 0x59, 7, 7, "XY_MONO_PAT_FIXED_BLT" }, + { 0x53, 8, 8, "XY_SRC_COPY_BLT" }, + { 0x54, 8, 8, "XY_MONO_SRC_COPY_BLT" }, + { 0x71, 9, 137, "XY_MONO_SRC_COPY_IMMEDIATE_BLT" }, + { 0x55, 9, 9, "XY_FULL_BLT" }, + { 0x55, 9, 137, "XY_FULL_IMMEDIATE_PATTERN_BLT" }, + { 0x56, 9, 9, "XY_FULL_MONO_SRC_BLT" }, + { 0x75, 10, 138, "XY_FULL_MONO_SRC_IMMEDIATE_PATTERN_BLT" }, + { 0x57, 12, 12, "XY_FULL_MONO_PATTERN_BLT" }, + { 0x58, 12, 12, "XY_FULL_MONO_PATTERN_MONO_SRC_BLT" }, + }; + + unsigned int op, len; + char *format = NULL; + uint32_t *data = kgem->batch + offset; + struct drm_i915_gem_relocation_entry *reloc; + + /* Special case the two most common ops that we detail in full */ + switch ((data[0] & 0x1fc00000) >> 22) { + case 0x50: + kgem_debug_print(data, offset, 0, + "XY_COLOR_BLT (rgb %sabled, alpha %sabled, dst tile %d)\n", + (data[0] & (1 << 20)) ? "en" : "dis", + (data[0] & (1 << 21)) ? "en" : "dis", + (data[0] >> 11) & 1); + + len = (data[0] & 0x000000ff) + 2; + assert(len == 6); + + switch ((data[1] >> 24) & 0x3) { + case 0: + format="8"; + break; + case 1: + format="565"; + break; + case 2: + format="1555"; + break; + case 3: + format="8888"; + break; + } + + kgem_debug_print(data, offset, 1, "format %s, pitch %d, " + "clipping %sabled\n", format, + (short)(data[1] & 0xffff), + data[1] & (1 << 30) ? "en" : "dis"); + kgem_debug_print(data, offset, 2, "(%d,%d)\n", + data[2] & 0xffff, data[2] >> 16); + kgem_debug_print(data, offset, 3, "(%d,%d)\n", + data[3] & 0xffff, data[3] >> 16); + reloc = kgem_debug_get_reloc_entry(kgem, offset+4); + assert(reloc); + kgem_debug_print(data, offset, 4, "dst offset 0x%08x [handle=%d, delta=%d, read=%x, write=%x (fenced? %d)]\n", + data[4], + reloc->target_handle, reloc->delta, + reloc->read_domains, reloc->write_domain, + kgem_debug_handle_is_fenced(kgem, reloc->target_handle)); + kgem_debug_print(data, offset, 5, "color\n"); + return len; + + case 0x53: + kgem_debug_print(data, offset, 0, + "XY_SRC_COPY_BLT (rgb %sabled, alpha %sabled, " + "src tile %d, dst tile %d)\n", + (data[0] & (1 << 20)) ? "en" : "dis", + (data[0] & (1 << 21)) ? "en" : "dis", + (data[0] >> 15) & 1, + (data[0] >> 11) & 1); + + len = (data[0] & 0x000000ff) + 2; + assert(len == 8); + + switch ((data[1] >> 24) & 0x3) { + case 0: + format="8"; + break; + case 1: + format="565"; + break; + case 2: + format="1555"; + break; + case 3: + format="8888"; + break; + } + + kgem_debug_print(data, offset, 1, "format %s, dst pitch %d, " + "clipping %sabled\n", format, + (short)(data[1] & 0xffff), + data[1] & (1 << 30) ? "en" : "dis"); + kgem_debug_print(data, offset, 2, "dst (%d,%d)\n", + data[2] & 0xffff, data[2] >> 16); + kgem_debug_print(data, offset, 3, "dst (%d,%d)\n", + data[3] & 0xffff, data[3] >> 16); + reloc = kgem_debug_get_reloc_entry(kgem, offset+4); + assert(reloc); + kgem_debug_print(data, offset, 4, "dst offset 0x%08x [handle=%d, delta=%d, read=%x, write=%x, (fenced? %d)]\n", + data[4], + reloc->target_handle, reloc->delta, + reloc->read_domains, reloc->write_domain, + kgem_debug_handle_is_fenced(kgem, reloc->target_handle)); + kgem_debug_print(data, offset, 5, "src (%d,%d)\n", + data[5] & 0xffff, data[5] >> 16); + kgem_debug_print(data, offset, 6, "src pitch %d\n", + (short)(data[6] & 0xffff)); + reloc = kgem_debug_get_reloc_entry(kgem, offset+7); + assert(reloc); + kgem_debug_print(data, offset, 7, "src offset 0x%08x [handle=%d, delta=%d, read=%x, write=%x (fenced? %d)]\n", + data[7], + reloc->target_handle, reloc->delta, + reloc->read_domains, reloc->write_domain, + kgem_debug_handle_is_fenced(kgem, reloc->target_handle)); + return len; + } + + for (op = 0; op < ARRAY_SIZE(opcodes); op++) { + if ((data[0] & 0x1fc00000) >> 22 == opcodes[op].opcode) { + unsigned int i; + + len = 1; + kgem_debug_print(data, offset, 0, "%s\n", opcodes[op].name); + if (opcodes[op].max_len > 1) { + len = (data[0] & 0x000000ff) + 2; + assert(len >= opcodes[op].min_len && + len <= opcodes[op].max_len); + } + + for (i = 1; i < len; i++) + kgem_debug_print(data, offset, i, "dword %d\n", i); + + return len; + } + } + + kgem_debug_print(data, offset, 0, "2D UNKNOWN\n"); + assert(0); + return 1; +} + +static int (*decode_3d(int gen))(struct kgem*, uint32_t) +{ + if (gen >= 60) { + return kgem_gen6_decode_3d; + } else if (gen >= 50) { + return kgem_gen5_decode_3d; + } else if (gen >= 40) { + return kgem_gen4_decode_3d; + } else if (gen >= 30) { + return kgem_gen3_decode_3d; + } + assert(0); +} + +static void (*finish_state(int gen))(struct kgem*) +{ + if (gen >= 60) { + return kgem_gen6_finish_state; + } else if (gen >= 50) { + return kgem_gen5_finish_state; + } else if (gen >= 40) { + return kgem_gen4_finish_state; + } else if (gen >= 30) { + return kgem_gen3_finish_state; + } + assert(0); +} + +void __kgem_batch_debug(struct kgem *kgem, uint32_t nbatch) +{ + int (*const decode[])(struct kgem *, uint32_t) = { + decode_mi, + decode_nop, + decode_2d, + decode_3d(kgem->gen), + }; + uint32_t offset = 0; + + while (offset < nbatch) { + int class = (kgem->batch[offset] & 0xe0000000) >> 29; + assert(class < ARRAY_SIZE(decode)); + offset += decode[class](kgem, offset); + } + + finish_state(kgem->gen)(kgem); +} diff --git a/src/sna/kgem_debug.h b/src/sna/kgem_debug.h new file mode 100644 index 00000000..f9a931df --- /dev/null +++ b/src/sna/kgem_debug.h @@ -0,0 +1,28 @@ +#ifndef KGEM_DEBUG_H +#define KGEM_DEBUG_H + +void +kgem_debug_print(const uint32_t *data, + uint32_t offset, unsigned int index, + char *fmt, ...); + +struct drm_i915_gem_relocation_entry * +kgem_debug_get_reloc_entry(struct kgem *kgem, uint32_t offset); + +struct kgem_bo * +kgem_debug_get_bo_for_reloc_entry(struct kgem *kgem, + struct drm_i915_gem_relocation_entry *reloc); + +int kgem_gen6_decode_3d(struct kgem *kgem, uint32_t offset); +void kgem_gen6_finish_state(struct kgem *kgem); + +int kgem_gen5_decode_3d(struct kgem *kgem, uint32_t offset); +void kgem_gen5_finish_state(struct kgem *kgem); + +int kgem_gen4_decode_3d(struct kgem *kgem, uint32_t offset); +void kgem_gen4_finish_state(struct kgem *kgem); + +int kgem_gen3_decode_3d(struct kgem *kgem, uint32_t offset); +void kgem_gen3_finish_state(struct kgem *kgem); + +#endif diff --git a/src/sna/kgem_debug_gen3.c b/src/sna/kgem_debug_gen3.c new file mode 100644 index 00000000..da1d9fc9 --- /dev/null +++ b/src/sna/kgem_debug_gen3.c @@ -0,0 +1,1615 @@ +/* + * Copyright © 2007-2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Eric Anholt <eric@anholt.net> + * Chris Wilson <chris@chris-wilson.co.uk> + * + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <sys/mman.h> +#include <assert.h> + +#include "sna.h" +#include "sna_reg.h" + +#include "gen3_render.h" + +#include "kgem_debug.h" + +enum type { + T_FLOAT32, + T_FLOAT16, +}; + +static struct state { + struct vertex_buffer { + int handle; + void *base; + const char *ptr; + int pitch; + + struct kgem_bo *current; + } vb; + struct vertex_elements { + int offset; + bool valid; + enum type type; + int size; + uint8_t swizzle[4]; + } ve[33]; + int num_ve; +} state; + +static float int_as_float(int i) +{ + union { + float f; + int i; + } x; + x.i = i; + return x.f; +} + +static void gen3_update_vertex_buffer_addr(struct kgem *kgem, + uint32_t offset) +{ + uint32_t handle; + struct kgem_bo *bo = NULL; + void *base, *ptr; + int i; + + offset *= sizeof(uint32_t); + + for (i = 0; i < kgem->nreloc; i++) + if (kgem->reloc[i].offset == offset) + break; + assert(i < kgem->nreloc); + handle = kgem->reloc[i].target_handle; + + if (handle == 0) { + base = kgem->batch; + } else { + list_for_each_entry(bo, &kgem->next_request->buffers, request) + if (bo->handle == handle) + break; + assert(&bo->request != &kgem->next_request->buffers); + base = kgem_bo_map(kgem, bo, PROT_READ); + } + ptr = (char *)base + kgem->reloc[i].delta; + + if (state.vb.current) + munmap(state.vb.base, state.vb.current->size); + + state.vb.current = bo; + state.vb.base = base; + state.vb.ptr = ptr; +} + +static void gen3_update_vertex_buffer_pitch(struct kgem *kgem, + uint32_t offset) +{ + state.vb.pitch = kgem->batch[offset] >> 16 & 0x3f; + state.vb.pitch *= sizeof(uint32_t); +} + +static void gen3_update_vertex_elements(struct kgem *kgem, uint32_t data) +{ + state.ve[1].valid = 1; + + switch ((data >> 6) & 7) { + case 1: + state.ve[1].type = T_FLOAT32; + state.ve[1].size = 3; + state.ve[1].swizzle[0] = 1; + state.ve[1].swizzle[1] = 1; + state.ve[1].swizzle[2] = 1; + state.ve[1].swizzle[3] = 3; + break; + case 2: + state.ve[1].type = T_FLOAT32; + state.ve[1].size = 4; + state.ve[1].swizzle[0] = 1; + state.ve[1].swizzle[1] = 1; + state.ve[1].swizzle[2] = 1; + state.ve[1].swizzle[3] = 1; + break; + case 3: + state.ve[1].type = T_FLOAT32; + state.ve[1].size = 2; + state.ve[1].swizzle[0] = 1; + state.ve[1].swizzle[1] = 1; + state.ve[1].swizzle[2] = 2; + state.ve[1].swizzle[3] = 3; + break; + case 4: + state.ve[1].type = T_FLOAT32; + state.ve[1].size = 3; + state.ve[1].swizzle[0] = 1; + state.ve[1].swizzle[1] = 1; + state.ve[1].swizzle[2] = 3; + state.ve[1].swizzle[3] = 1; + break; + } + + state.ve[2].valid = 0; + state.ve[3].valid = 0; +} + +static void gen3_update_vertex_texcoords(struct kgem *kgem, uint32_t data) +{ + int id; + for (id = 0; id < 8; id++) { + uint32_t fmt = (data >> (id*4)) & 0xf; + int width; + + state.ve[id+4].valid = fmt != 0xf; + + width = 0; + switch (fmt) { + case 0: + state.ve[id+4].type = T_FLOAT32; + width = state.ve[id+4].size = 2; + break; + case 1: + state.ve[id+4].type = T_FLOAT32; + width = state.ve[id+4].size = 3; + break; + case 2: + state.ve[id+4].type = T_FLOAT32; + width = state.ve[id+4].size = 4; + break; + case 3: + state.ve[id+4].type = T_FLOAT32; + width = state.ve[id+4].size = 1; + break; + case 4: + state.ve[id+4].type = T_FLOAT16; + width = state.ve[id+4].size = 2; + break; + case 5: + state.ve[id+4].type = T_FLOAT16; + width = state.ve[id+4].size = 4; + break; + } + + state.ve[id+4].swizzle[0] = width > 0 ? 1 : 2; + state.ve[id+4].swizzle[1] = width > 1 ? 1 : 2; + state.ve[id+4].swizzle[2] = width > 2 ? 1 : 2; + state.ve[id+4].swizzle[3] = width > 3 ? 1 : 2; + } +} + +static void gen3_update_vertex_elements_offsets(struct kgem *kgem) +{ + int i, offset; + + for (i = offset = 0; i < ARRAY_SIZE(state.ve); i++) { + int size; + + if (!state.ve[i].valid) + continue; + + size = 0; + switch (state.ve[i].type) { + case T_FLOAT16: + size = 4; + break; + case T_FLOAT32: + size = 4; + break; + } + state.ve[i].offset = offset; + offset += size * state.ve[i].size; + state.num_ve = i; + } +} + +static void vertices_float32_out(const struct vertex_elements *ve, const float *f, int max) +{ + int c; + + ErrorF("("); + for (c = 0; c < max; c++) { + switch (ve->swizzle[c]) { + case 0: ErrorF("#"); break; + case 1: ErrorF("%f", f[c]); break; + case 2: ErrorF("0.0"); break; + case 3: ErrorF("1.0"); break; + case 4: ErrorF("0x1"); break; + case 5: break; + default: ErrorF("?"); + } + if (c < max-1) + ErrorF(", "); + } + ErrorF(")"); +} + +static void ve_out(const struct vertex_elements *ve, const void *ptr) +{ + switch (ve->type) { + case T_FLOAT32: + vertices_float32_out(ve, ptr, ve->size); + break; + case T_FLOAT16: + //vertices_float16_out(ve, ptr, ve->size); + break; + } +} + +static void indirect_vertex_out(struct kgem *kgem, uint32_t v) +{ + const struct vertex_buffer *vb = &state.vb; + int i = 1; + + do { + const struct vertex_elements *ve = &state.ve[i]; + const void *ptr = vb->ptr + v * vb->pitch + ve->offset; + + if (!ve->valid) + continue; + + ve_out(ve, ptr); + + while (++i <= state.num_ve && !state.ve[i].valid) + ; + + if (i <= state.num_ve) + ErrorF(", "); + } while (i <= state.num_ve); +} + +static int inline_vertex_out(struct kgem *kgem, void *base) +{ + const struct vertex_buffer *vb = &state.vb; + int i = 1; + + do { + const struct vertex_elements *ve = &state.ve[i]; + const void *ptr = (char *)base + ve->offset; + + if (!ve->valid) + continue; + + ve_out(ve, ptr); + + while (++i <= state.num_ve && !state.ve[i].valid) + ; + + if (i <= state.num_ve) + ErrorF(", "); + } while (i <= state.num_ve); + + return vb->pitch; +} + +static int +gen3_decode_3d_1c(struct kgem *kgem, uint32_t offset) +{ + uint32_t *data = kgem->batch + offset; + uint32_t opcode; + + opcode = (data[0] & 0x00f80000) >> 19; + + switch (opcode) { + case 0x11: + kgem_debug_print(data, offset, 0, "3DSTATE_DEPTH_SUBRECTANGLE_DISABLE\n"); + return 1; + case 0x10: + kgem_debug_print(data, offset, 0, "3DSTATE_SCISSOR_ENABLE %s\n", + data[0]&1?"enabled":"disabled"); + return 1; + case 0x01: + kgem_debug_print(data, offset, 0, "3DSTATE_MAP_COORD_SET_I830\n"); + return 1; + case 0x0a: + kgem_debug_print(data, offset, 0, "3DSTATE_MAP_CUBE_I830\n"); + return 1; + case 0x05: + kgem_debug_print(data, offset, 0, "3DSTATE_MAP_TEX_STREAM_I830\n"); + return 1; + } + + kgem_debug_print(data, offset, 0, "3D UNKNOWN: 3d_1c opcode = 0x%x\n", + opcode); + assert(0); + return 1; +} + +/** Sets the string dstname to describe the destination of the PS instruction */ +static void +gen3_get_instruction_dst(uint32_t *data, int i, char *dstname, int do_mask) +{ + uint32_t a0 = data[i]; + int dst_nr = (a0 >> 14) & 0xf; + char dstmask[8]; + char *sat; + + if (do_mask) { + if (((a0 >> 10) & 0xf) == 0xf) { + dstmask[0] = 0; + } else { + int dstmask_index = 0; + + dstmask[dstmask_index++] = '.'; + if (a0 & (1 << 10)) + dstmask[dstmask_index++] = 'x'; + if (a0 & (1 << 11)) + dstmask[dstmask_index++] = 'y'; + if (a0 & (1 << 12)) + dstmask[dstmask_index++] = 'z'; + if (a0 & (1 << 13)) + dstmask[dstmask_index++] = 'w'; + dstmask[dstmask_index++] = 0; + } + + if (a0 & (1 << 22)) + sat = ".sat"; + else + sat = ""; + } else { + dstmask[0] = 0; + sat = ""; + } + + switch ((a0 >> 19) & 0x7) { + case 0: + assert(dst_nr <= 15); + sprintf(dstname, "R%d%s%s", dst_nr, dstmask, sat); + break; + case 4: + assert(dst_nr == 0); + sprintf(dstname, "oC%s%s", dstmask, sat); + break; + case 5: + assert(dst_nr == 0); + sprintf(dstname, "oD%s%s", dstmask, sat); + break; + case 6: + assert(dst_nr <= 3); + sprintf(dstname, "U%d%s%s", dst_nr, dstmask, sat); + break; + default: + sprintf(dstname, "RESERVED"); + break; + } +} + +static char * +gen3_get_channel_swizzle(uint32_t select) +{ + switch (select & 0x7) { + case 0: + return (select & 8) ? "-x" : "x"; + case 1: + return (select & 8) ? "-y" : "y"; + case 2: + return (select & 8) ? "-z" : "z"; + case 3: + return (select & 8) ? "-w" : "w"; + case 4: + return (select & 8) ? "-0" : "0"; + case 5: + return (select & 8) ? "-1" : "1"; + default: + return (select & 8) ? "-bad" : "bad"; + } +} + +static void +gen3_get_instruction_src_name(uint32_t src_type, uint32_t src_nr, char *name) +{ + switch (src_type) { + case 0: + sprintf(name, "R%d", src_nr); + assert(src_nr <= 15); + break; + case 1: + if (src_nr < 8) + sprintf(name, "T%d", src_nr); + else if (src_nr == 8) + sprintf(name, "DIFFUSE"); + else if (src_nr == 9) + sprintf(name, "SPECULAR"); + else if (src_nr == 10) + sprintf(name, "FOG"); + else { + assert(0); + sprintf(name, "RESERVED"); + } + break; + case 2: + sprintf(name, "C%d", src_nr); + assert(src_nr <= 31); + break; + case 4: + sprintf(name, "oC"); + assert(src_nr == 0); + break; + case 5: + sprintf(name, "oD"); + assert(src_nr == 0); + break; + case 6: + sprintf(name, "U%d", src_nr); + assert(src_nr <= 3); + break; + default: + sprintf(name, "RESERVED"); + assert(0); + break; + } +} + +static void +gen3_get_instruction_src0(uint32_t *data, int i, char *srcname) +{ + uint32_t a0 = data[i]; + uint32_t a1 = data[i + 1]; + int src_nr = (a0 >> 2) & 0x1f; + char *swizzle_x = gen3_get_channel_swizzle((a1 >> 28) & 0xf); + char *swizzle_y = gen3_get_channel_swizzle((a1 >> 24) & 0xf); + char *swizzle_z = gen3_get_channel_swizzle((a1 >> 20) & 0xf); + char *swizzle_w = gen3_get_channel_swizzle((a1 >> 16) & 0xf); + char swizzle[100]; + + gen3_get_instruction_src_name((a0 >> 7) & 0x7, src_nr, srcname); + sprintf(swizzle, ".%s%s%s%s", swizzle_x, swizzle_y, swizzle_z, swizzle_w); + if (strcmp(swizzle, ".xyzw") != 0) + strcat(srcname, swizzle); +} + +static void +gen3_get_instruction_src1(uint32_t *data, int i, char *srcname) +{ + uint32_t a1 = data[i + 1]; + uint32_t a2 = data[i + 2]; + int src_nr = (a1 >> 8) & 0x1f; + char *swizzle_x = gen3_get_channel_swizzle((a1 >> 4) & 0xf); + char *swizzle_y = gen3_get_channel_swizzle((a1 >> 0) & 0xf); + char *swizzle_z = gen3_get_channel_swizzle((a2 >> 28) & 0xf); + char *swizzle_w = gen3_get_channel_swizzle((a2 >> 24) & 0xf); + char swizzle[100]; + + gen3_get_instruction_src_name((a1 >> 13) & 0x7, src_nr, srcname); + sprintf(swizzle, ".%s%s%s%s", swizzle_x, swizzle_y, swizzle_z, swizzle_w); + if (strcmp(swizzle, ".xyzw") != 0) + strcat(srcname, swizzle); +} + +static void +gen3_get_instruction_src2(uint32_t *data, int i, char *srcname) +{ + uint32_t a2 = data[i + 2]; + int src_nr = (a2 >> 16) & 0x1f; + char *swizzle_x = gen3_get_channel_swizzle((a2 >> 12) & 0xf); + char *swizzle_y = gen3_get_channel_swizzle((a2 >> 8) & 0xf); + char *swizzle_z = gen3_get_channel_swizzle((a2 >> 4) & 0xf); + char *swizzle_w = gen3_get_channel_swizzle((a2 >> 0) & 0xf); + char swizzle[100]; + + gen3_get_instruction_src_name((a2 >> 21) & 0x7, src_nr, srcname); + sprintf(swizzle, ".%s%s%s%s", swizzle_x, swizzle_y, swizzle_z, swizzle_w); + if (strcmp(swizzle, ".xyzw") != 0) + strcat(srcname, swizzle); +} + +static void +gen3_get_instruction_addr(uint32_t src_type, uint32_t src_nr, char *name) +{ + switch (src_type) { + case 0: + sprintf(name, "R%d", src_nr); + assert(src_nr <= 15); + break; + case 1: + if (src_nr < 8) + sprintf(name, "T%d", src_nr); + else if (src_nr == 8) + sprintf(name, "DIFFUSE"); + else if (src_nr == 9) + sprintf(name, "SPECULAR"); + else if (src_nr == 10) + sprintf(name, "FOG"); + else { + assert(0); + sprintf(name, "RESERVED"); + } + break; + case 4: + sprintf(name, "oC"); + assert(src_nr == 0); + break; + case 5: + sprintf(name, "oD"); + assert(src_nr == 0); + break; + default: + assert(0); + sprintf(name, "RESERVED"); + break; + } +} + +static void +gen3_decode_alu1(uint32_t *data, uint32_t offset, + int i, char *instr_prefix, char *op_name) +{ + char dst[100], src0[100]; + + gen3_get_instruction_dst(data, i, dst, 1); + gen3_get_instruction_src0(data, i, src0); + + kgem_debug_print(data, offset, i++, "%s: %s %s, %s\n", instr_prefix, + op_name, dst, src0); + kgem_debug_print(data, offset, i++, "%s\n", instr_prefix); + kgem_debug_print(data, offset, i++, "%s\n", instr_prefix); +} + +static void +gen3_decode_alu2(uint32_t *data, uint32_t offset, + int i, char *instr_prefix, char *op_name) +{ + char dst[100], src0[100], src1[100]; + + gen3_get_instruction_dst(data, i, dst, 1); + gen3_get_instruction_src0(data, i, src0); + gen3_get_instruction_src1(data, i, src1); + + kgem_debug_print(data, offset, i++, "%s: %s %s, %s, %s\n", instr_prefix, + op_name, dst, src0, src1); + kgem_debug_print(data, offset, i++, "%s\n", instr_prefix); + kgem_debug_print(data, offset, i++, "%s\n", instr_prefix); +} + +static void +gen3_decode_alu3(uint32_t *data, uint32_t offset, + int i, char *instr_prefix, char *op_name) +{ + char dst[100], src0[100], src1[100], src2[100]; + + gen3_get_instruction_dst(data, i, dst, 1); + gen3_get_instruction_src0(data, i, src0); + gen3_get_instruction_src1(data, i, src1); + gen3_get_instruction_src2(data, i, src2); + + kgem_debug_print(data, offset, i++, "%s: %s %s, %s, %s, %s\n", instr_prefix, + op_name, dst, src0, src1, src2); + kgem_debug_print(data, offset, i++, "%s\n", instr_prefix); + kgem_debug_print(data, offset, i++, "%s\n", instr_prefix); +} + +static void +gen3_decode_tex(uint32_t *data, uint32_t offset, int i, char *instr_prefix, + char *tex_name) +{ + uint32_t t0 = data[i]; + uint32_t t1 = data[i + 1]; + char dst_name[100]; + char addr_name[100]; + int sampler_nr; + + gen3_get_instruction_dst(data, i, dst_name, 0); + gen3_get_instruction_addr((t1 >> 24) & 0x7, + (t1 >> 17) & 0xf, + addr_name); + sampler_nr = t0 & 0xf; + + kgem_debug_print(data, offset, i++, "%s: %s %s, S%d, %s\n", instr_prefix, + tex_name, dst_name, sampler_nr, addr_name); + kgem_debug_print(data, offset, i++, "%s\n", instr_prefix); + kgem_debug_print(data, offset, i++, "%s\n", instr_prefix); +} + +static void +gen3_decode_dcl(uint32_t *data, uint32_t offset, int i, char *instr_prefix) +{ + uint32_t d0 = data[i]; + char *sampletype; + int dcl_nr = (d0 >> 14) & 0xf; + char *dcl_x = d0 & (1 << 10) ? "x" : ""; + char *dcl_y = d0 & (1 << 11) ? "y" : ""; + char *dcl_z = d0 & (1 << 12) ? "z" : ""; + char *dcl_w = d0 & (1 << 13) ? "w" : ""; + char dcl_mask[10]; + + switch ((d0 >> 19) & 0x3) { + case 1: + sprintf(dcl_mask, ".%s%s%s%s", dcl_x, dcl_y, dcl_z, dcl_w); + assert (strcmp(dcl_mask, ".")); + + assert(dcl_nr <= 10); + if (dcl_nr < 8) { + if (strcmp(dcl_mask, ".x") != 0 && + strcmp(dcl_mask, ".xy") != 0 && + strcmp(dcl_mask, ".xz") != 0 && + strcmp(dcl_mask, ".w") != 0 && + strcmp(dcl_mask, ".xyzw") != 0) { + assert(0); + } + kgem_debug_print(data, offset, i++, "%s: DCL T%d%s\n", instr_prefix, + dcl_nr, dcl_mask); + } else { + if (strcmp(dcl_mask, ".xz") == 0) + assert(0); + else if (strcmp(dcl_mask, ".xw") == 0) + assert(0); + else if (strcmp(dcl_mask, ".xzw") == 0) + assert(0); + + if (dcl_nr == 8) { + kgem_debug_print(data, offset, i++, "%s: DCL DIFFUSE%s\n", instr_prefix, + dcl_mask); + } else if (dcl_nr == 9) { + kgem_debug_print(data, offset, i++, "%s: DCL SPECULAR%s\n", instr_prefix, + dcl_mask); + } else if (dcl_nr == 10) { + kgem_debug_print(data, offset, i++, "%s: DCL FOG%s\n", instr_prefix, + dcl_mask); + } + } + kgem_debug_print(data, offset, i++, "%s\n", instr_prefix); + kgem_debug_print(data, offset, i++, "%s\n", instr_prefix); + break; + case 3: + switch ((d0 >> 22) & 0x3) { + case 0: + sampletype = "2D"; + break; + case 1: + sampletype = "CUBE"; + break; + case 2: + sampletype = "3D"; + break; + default: + sampletype = "RESERVED"; + break; + } + assert(dcl_nr <= 15); + kgem_debug_print(data, offset, i++, "%s: DCL S%d %s\n", instr_prefix, + dcl_nr, sampletype); + kgem_debug_print(data, offset, i++, "%s\n", instr_prefix); + kgem_debug_print(data, offset, i++, "%s\n", instr_prefix); + break; + default: + kgem_debug_print(data, offset, i++, "%s: DCL RESERVED%d\n", instr_prefix, dcl_nr); + kgem_debug_print(data, offset, i++, "%s\n", instr_prefix); + kgem_debug_print(data, offset, i++, "%s\n", instr_prefix); + } +} + +static void +gen3_decode_instruction(uint32_t *data, uint32_t offset, + int i, char *instr_prefix) +{ + switch ((data[i] >> 24) & 0x1f) { + case 0x0: + kgem_debug_print(data, offset, i++, "%s: NOP\n", instr_prefix); + kgem_debug_print(data, offset, i++, "%s\n", instr_prefix); + kgem_debug_print(data, offset, i++, "%s\n", instr_prefix); + break; + case 0x01: + gen3_decode_alu2(data, offset, i, instr_prefix, "ADD"); + break; + case 0x02: + gen3_decode_alu1(data, offset, i, instr_prefix, "MOV"); + break; + case 0x03: + gen3_decode_alu2(data, offset, i, instr_prefix, "MUL"); + break; + case 0x04: + gen3_decode_alu3(data, offset, i, instr_prefix, "MAD"); + break; + case 0x05: + gen3_decode_alu3(data, offset, i, instr_prefix, "DP2ADD"); + break; + case 0x06: + gen3_decode_alu2(data, offset, i, instr_prefix, "DP3"); + break; + case 0x07: + gen3_decode_alu2(data, offset, i, instr_prefix, "DP4"); + break; + case 0x08: + gen3_decode_alu1(data, offset, i, instr_prefix, "FRC"); + break; + case 0x09: + gen3_decode_alu1(data, offset, i, instr_prefix, "RCP"); + break; + case 0x0a: + gen3_decode_alu1(data, offset, i, instr_prefix, "RSQ"); + break; + case 0x0b: + gen3_decode_alu1(data, offset, i, instr_prefix, "EXP"); + break; + case 0x0c: + gen3_decode_alu1(data, offset, i, instr_prefix, "LOG"); + break; + case 0x0d: + gen3_decode_alu2(data, offset, i, instr_prefix, "CMP"); + break; + case 0x0e: + gen3_decode_alu2(data, offset, i, instr_prefix, "MIN"); + break; + case 0x0f: + gen3_decode_alu2(data, offset, i, instr_prefix, "MAX"); + break; + case 0x10: + gen3_decode_alu1(data, offset, i, instr_prefix, "FLR"); + break; + case 0x11: + gen3_decode_alu1(data, offset, i, instr_prefix, "MOD"); + break; + case 0x12: + gen3_decode_alu1(data, offset, i, instr_prefix, "TRC"); + break; + case 0x13: + gen3_decode_alu2(data, offset, i, instr_prefix, "SGE"); + break; + case 0x14: + gen3_decode_alu2(data, offset, i, instr_prefix, "SLT"); + break; + case 0x15: + gen3_decode_tex(data, offset, i, instr_prefix, "TEXLD"); + break; + case 0x16: + gen3_decode_tex(data, offset, i, instr_prefix, "TEXLDP"); + break; + case 0x17: + gen3_decode_tex(data, offset, i, instr_prefix, "TEXLDB"); + break; + case 0x19: + gen3_decode_dcl(data, offset, i, instr_prefix); + break; + default: + kgem_debug_print(data, offset, i++, "%s: unknown\n", instr_prefix); + kgem_debug_print(data, offset, i++, "%s\n", instr_prefix); + kgem_debug_print(data, offset, i++, "%s\n", instr_prefix); + break; + } +} + +static char * +gen3_decode_compare_func(uint32_t op) +{ + switch (op&0x7) { + case 0: return "always"; + case 1: return "never"; + case 2: return "less"; + case 3: return "equal"; + case 4: return "lequal"; + case 5: return "greater"; + case 6: return "notequal"; + case 7: return "gequal"; + } + return ""; +} + +static char * +gen3_decode_stencil_op(uint32_t op) +{ + switch (op&0x7) { + case 0: return "keep"; + case 1: return "zero"; + case 2: return "replace"; + case 3: return "incr_sat"; + case 4: return "decr_sat"; + case 5: return "greater"; + case 6: return "incr"; + case 7: return "decr"; + } + return ""; +} + +#if 0 +/* part of MODES_4 */ +static char * +gen3_decode_logic_op(uint32_t op) +{ + switch (op&0xf) { + case 0: return "clear"; + case 1: return "nor"; + case 2: return "and_inv"; + case 3: return "copy_inv"; + case 4: return "and_rvrse"; + case 5: return "inv"; + case 6: return "xor"; + case 7: return "nand"; + case 8: return "and"; + case 9: return "equiv"; + case 10: return "noop"; + case 11: return "or_inv"; + case 12: return "copy"; + case 13: return "or_rvrse"; + case 14: return "or"; + case 15: return "set"; + } + return ""; +} +#endif + +static char * +gen3_decode_blend_fact(uint32_t op) +{ + switch (op&0xf) { + case 1: return "zero"; + case 2: return "one"; + case 3: return "src_colr"; + case 4: return "inv_src_colr"; + case 5: return "src_alpha"; + case 6: return "inv_src_alpha"; + case 7: return "dst_alpha"; + case 8: return "inv_dst_alpha"; + case 9: return "dst_colr"; + case 10: return "inv_dst_colr"; + case 11: return "src_alpha_sat"; + case 12: return "cnst_colr"; + case 13: return "inv_cnst_colr"; + case 14: return "cnst_alpha"; + case 15: return "inv_const_alpha"; + } + return ""; +} + +static char * +decode_tex_coord_mode(uint32_t mode) +{ + switch (mode&0x7) { + case 0: return "wrap"; + case 1: return "mirror"; + case 2: return "clamp_edge"; + case 3: return "cube"; + case 4: return "clamp_border"; + case 5: return "mirror_once"; + } + return ""; +} + +static char * +gen3_decode_sample_filter(uint32_t mode) +{ + switch (mode&0x7) { + case 0: return "nearest"; + case 1: return "linear"; + case 2: return "anisotropic"; + case 3: return "4x4_1"; + case 4: return "4x4_2"; + case 5: return "4x4_flat"; + case 6: return "6x5_mono"; + } + return ""; +} + +static int +gen3_decode_load_state_immediate_1(struct kgem *kgem, uint32_t offset) +{ + const uint32_t *data = kgem->batch + offset; + int len, i, word; + + kgem_debug_print(data, offset, 0, "3DSTATE_LOAD_STATE_IMMEDIATE_1\n"); + len = (data[0] & 0x0000000f) + 2; + i = 1; + for (word = 0; word <= 8; word++) { + if (data[0] & (1 << (4 + word))) { + switch (word) { + case 0: + kgem_debug_print(data, offset, i, "S0: vbo offset: 0x%08x%s\n", + data[i]&(~1),data[i]&1?", auto cache invalidate disabled":""); + gen3_update_vertex_buffer_addr(kgem, offset + i); + break; + case 1: + kgem_debug_print(data, offset, i, "S1: vertex width: %i, vertex pitch: %i\n", + (data[i]>>24)&0x3f,(data[i]>>16)&0x3f); + gen3_update_vertex_buffer_pitch(kgem, offset + i); + break; + case 2: + { + char buf[200]; + int len = 0; + int tex_num; + for (tex_num = 0; tex_num < 8; tex_num++) { + switch((data[i]>>tex_num*4)&0xf) { + case 0: len += sprintf(buf + len, "%i=2D ", tex_num); break; + case 1: len += sprintf(buf + len, "%i=3D ", tex_num); break; + case 2: len += sprintf(buf + len, "%i=4D ", tex_num); break; + case 3: len += sprintf(buf + len, "%i=1D ", tex_num); break; + case 4: len += sprintf(buf + len, "%i=2D_16 ", tex_num); break; + case 5: len += sprintf(buf + len, "%i=4D_16 ", tex_num); break; + case 0xf: len += sprintf(buf + len, "%i=NP ", tex_num); break; + } + } + kgem_debug_print(data, offset, i, "S2: texcoord formats: %s\n", buf); + gen3_update_vertex_texcoords(kgem, data[i]); + } + + break; + case 3: + kgem_debug_print(data, offset, i, "S3: not documented\n", word); + break; + case 4: + { + char *cullmode = ""; + char *vfmt_xyzw = ""; + switch((data[i]>>13)&0x3) { + case 0: cullmode = "both"; break; + case 1: cullmode = "none"; break; + case 2: cullmode = "cw"; break; + case 3: cullmode = "ccw"; break; + } + switch(data[i] & (7<<6 | 1<<2)) { + case 1<<6: vfmt_xyzw = "XYZ,"; break; + case 2<<6: vfmt_xyzw = "XYZW,"; break; + case 3<<6: vfmt_xyzw = "XY,"; break; + case 4<<6: vfmt_xyzw = "XYW,"; break; + case 1<<6 | 1<<2: vfmt_xyzw = "XYZF,"; break; + case 2<<6 | 1<<2: vfmt_xyzw = "XYZWF,"; break; + case 3<<6 | 1<<2: vfmt_xyzw = "XYF,"; break; + case 4<<6 | 1<<2: vfmt_xyzw = "XYWF,"; break; + } + kgem_debug_print(data, offset, i, "S4: point_width=%i, line_width=%.1f," + "%s%s%s%s%s cullmode=%s, vfmt=%s%s%s%s%s%s " + "%s%s\n", + (data[i]>>23)&0x1ff, + ((data[i]>>19)&0xf) / 2.0, + data[i]&(0xf<<15)?" flatshade=":"", + data[i]&(1<<18)?"Alpha,":"", + data[i]&(1<<17)?"Fog,":"", + data[i]&(1<<16)?"Specular,":"", + data[i]&(1<<15)?"Color,":"", + cullmode, + data[i]&(1<<12)?"PointWidth,":"", + data[i]&(1<<11)?"SpecFog,":"", + data[i]&(1<<10)?"Color,":"", + data[i]&(1<<9)?"DepthOfs,":"", + vfmt_xyzw, + data[i]&(1<<9)?"FogParam,":"", + data[i]&(1<<5)?"force default diffuse, ":"", + data[i]&(1<<4)?"force default specular, ":"", + data[i]&(1<<3)?"local depth ofs enable, ":"", + data[i]&(1<<1)?"point sprite enable, ":"", + data[i]&(1<<0)?"line AA enable, ":""); + gen3_update_vertex_elements(kgem, data[i]); + break; + } + case 5: + { + kgem_debug_print(data, offset, i, "S5:%s%s%s%s%s" + "%s%s%s%s stencil_ref=0x%x, stencil_test=%s, " + "stencil_fail=%s, stencil_pass_z_fail=%s, " + "stencil_pass_z_pass=%s, %s%s%s%s\n", + data[i]&(0xf<<28)?" write_disable=":"", + data[i]&(1<<31)?"Alpha,":"", + data[i]&(1<<30)?"Red,":"", + data[i]&(1<<29)?"Green,":"", + data[i]&(1<<28)?"Blue,":"", + data[i]&(1<<27)?" force default point size,":"", + data[i]&(1<<26)?" last pixel enable,":"", + data[i]&(1<<25)?" global depth ofs enable,":"", + data[i]&(1<<24)?" fog enable,":"", + (data[i]>>16)&0xff, + gen3_decode_compare_func(data[i]>>13), + gen3_decode_stencil_op(data[i]>>10), + gen3_decode_stencil_op(data[i]>>7), + gen3_decode_stencil_op(data[i]>>4), + data[i]&(1<<3)?"stencil write enable, ":"", + data[i]&(1<<2)?"stencil test enable, ":"", + data[i]&(1<<1)?"color dither enable, ":"", + data[i]&(1<<0)?"logicop enable, ":""); + } + break; + case 6: + kgem_debug_print(data, offset, i, "S6: %salpha_test=%s, alpha_ref=0x%x, " + "depth_test=%s, %ssrc_blnd_fct=%s, dst_blnd_fct=%s, " + "%s%stristrip_provoking_vertex=%i\n", + data[i]&(1<<31)?"alpha test enable, ":"", + gen3_decode_compare_func(data[i]>>28), + data[i]&(0xff<<20), + gen3_decode_compare_func(data[i]>>16), + data[i]&(1<<15)?"cbuf blend enable, ":"", + gen3_decode_blend_fact(data[i]>>8), + gen3_decode_blend_fact(data[i]>>4), + data[i]&(1<<3)?"depth write enable, ":"", + data[i]&(1<<2)?"cbuf write enable, ":"", + data[i]&(0x3)); + break; + case 7: + kgem_debug_print(data, offset, i, "S7: depth offset constant: 0x%08x\n", data[i]); + break; + } + i++; + } + } + + assert(len == i); + return len; +} + +static int +gen3_decode_3d_1d(struct kgem *kgem, uint32_t offset) +{ + uint32_t *data = kgem->batch + offset; + unsigned int len, i, c, idx, word, map, sampler, instr; + char *format, *zformat, *type; + uint32_t opcode; + const struct { + uint32_t opcode; + int min_len; + int max_len; + char *name; + } opcodes_3d_1d[] = { + { 0x86, 4, 4, "3DSTATE_CHROMA_KEY" }, + { 0x88, 2, 2, "3DSTATE_CONSTANT_BLEND_COLOR" }, + { 0x99, 2, 2, "3DSTATE_DEFAULT_DIFFUSE" }, + { 0x9a, 2, 2, "3DSTATE_DEFAULT_SPECULAR" }, + { 0x98, 2, 2, "3DSTATE_DEFAULT_Z" }, + { 0x97, 2, 2, "3DSTATE_DEPTH_OFFSET_SCALE" }, + { 0x9d, 65, 65, "3DSTATE_FILTER_COEFFICIENTS_4X4" }, + { 0x9e, 4, 4, "3DSTATE_MONO_FILTER" }, + { 0x89, 4, 4, "3DSTATE_FOG_MODE" }, + { 0x8f, 2, 16, "3DSTATE_MAP_PALLETE_LOAD_32" }, + { 0x83, 2, 2, "3DSTATE_SPAN_STIPPLE" }, + }, *opcode_3d_1d; + + opcode = (data[0] & 0x00ff0000) >> 16; + + switch (opcode) { + case 0x07: + /* This instruction is unusual. A 0 length means just 1 DWORD instead of + * 2. The 0 length is specified in one place to be unsupported, but + * stated to be required in another, and 0 length LOAD_INDIRECTs appear + * to cause no harm at least. + */ + kgem_debug_print(data, offset, 0, "3DSTATE_LOAD_INDIRECT\n"); + len = (data[0] & 0x000000ff) + 1; + i = 1; + if (data[0] & (0x01 << 8)) { + kgem_debug_print(data, offset, i++, "SIS.0\n"); + kgem_debug_print(data, offset, i++, "SIS.1\n"); + } + if (data[0] & (0x02 << 8)) { + kgem_debug_print(data, offset, i++, "DIS.0\n"); + } + if (data[0] & (0x04 << 8)) { + kgem_debug_print(data, offset, i++, "SSB.0\n"); + kgem_debug_print(data, offset, i++, "SSB.1\n"); + } + if (data[0] & (0x08 << 8)) { + kgem_debug_print(data, offset, i++, "MSB.0\n"); + kgem_debug_print(data, offset, i++, "MSB.1\n"); + } + if (data[0] & (0x10 << 8)) { + kgem_debug_print(data, offset, i++, "PSP.0\n"); + kgem_debug_print(data, offset, i++, "PSP.1\n"); + } + if (data[0] & (0x20 << 8)) { + kgem_debug_print(data, offset, i++, "PSC.0\n"); + kgem_debug_print(data, offset, i++, "PSC.1\n"); + } + assert(len == i); + return len; + case 0x04: + return gen3_decode_load_state_immediate_1(kgem, offset); + case 0x03: + kgem_debug_print(data, offset, 0, "3DSTATE_LOAD_STATE_IMMEDIATE_2\n"); + len = (data[0] & 0x0000000f) + 2; + i = 1; + for (word = 6; word <= 14; word++) { + if (data[0] & (1 << word)) { + if (word == 6) + kgem_debug_print(data, offset, i++, "TBCF\n"); + else if (word >= 7 && word <= 10) { + kgem_debug_print(data, offset, i++, "TB%dC\n", word - 7); + kgem_debug_print(data, offset, i++, "TB%dA\n", word - 7); + } else if (word >= 11 && word <= 14) { + kgem_debug_print(data, offset, i, "TM%dS0: offset=0x%08x, %s\n", + word - 11, + data[i]&0xfffffffe, + data[i]&1?"use fence":""); + i++; + kgem_debug_print(data, offset, i, "TM%dS1: height=%i, width=%i, %s\n", + word - 11, + data[i]>>21, (data[i]>>10)&0x3ff, + data[i]&2?(data[i]&1?"y-tiled":"x-tiled"):""); + i++; + kgem_debug_print(data, offset, i, "TM%dS2: pitch=%i, \n", + word - 11, + ((data[i]>>21) + 1)*4); + i++; + kgem_debug_print(data, offset, i++, "TM%dS3\n", word - 11); + kgem_debug_print(data, offset, i++, "TM%dS4: dflt color\n", word - 11); + } + } + } + assert(len == i); + return len; + case 0x00: + kgem_debug_print(data, offset, 0, "3DSTATE_MAP_STATE\n"); + len = (data[0] & 0x0000003f) + 2; + kgem_debug_print(data, offset, 1, "mask\n"); + + i = 2; + for (map = 0; map <= 15; map++) { + if (data[1] & (1 << map)) { + int width, height, pitch, dword; + const char *tiling; + + dword = data[i]; + kgem_debug_print(data, offset, i++, "map %d MS2 %s%s%s\n", map, + dword&(1<<31)?"untrusted surface, ":"", + dword&(1<<1)?"vertical line stride enable, ":"", + dword&(1<<0)?"vertical ofs enable, ":""); + + dword = data[i]; + width = ((dword >> 10) & ((1 << 11) - 1))+1; + height = ((dword >> 21) & ((1 << 11) - 1))+1; + + tiling = "none"; + if (dword & (1 << 2)) + tiling = "fenced"; + else if (dword & (1 << 1)) + tiling = dword & (1 << 0) ? "Y" : "X"; + type = " BAD"; + format = " (invalid)"; + switch ((dword>>7) & 0x7) { + case 1: + type = "8"; + switch ((dword>>3) & 0xf) { + case 0: format = "I"; break; + case 1: format = "L"; break; + case 4: format = "A"; break; + case 5: format = " mono"; break; + } + break; + case 2: + type = "16"; + switch ((dword>>3) & 0xf) { + case 0: format = " rgb565"; break; + case 1: format = " argb1555"; break; + case 2: format = " argb4444"; break; + case 3: format = " ay88"; break; + case 5: format = " 88dvdu"; break; + case 6: format = " bump655"; break; + case 7: format = "I"; break; + case 8: format = "L"; break; + case 9: format = "A"; break; + } + break; + case 3: + type = "32"; + switch ((dword>>3) & 0xf) { + case 0: format = " argb8888"; break; + case 1: format = " abgr8888"; break; + case 2: format = " xrgb8888"; break; + case 3: format = " xbgr8888"; break; + case 4: format = " qwvu8888"; break; + case 5: format = " axvu8888"; break; + case 6: format = " lxvu8888"; break; + case 7: format = " xlvu8888"; break; + case 8: format = " argb2101010"; break; + case 9: format = " abgr2101010"; break; + case 10: format = " awvu2101010"; break; + case 11: format = " gr1616"; break; + case 12: format = " vu1616"; break; + case 13: format = " xI824"; break; + case 14: format = " xA824"; break; + case 15: format = " xL824"; break; + } + break; + case 5: + type = "422"; + switch ((dword>>3) & 0xf) { + case 0: format = " yuv_swapy"; break; + case 1: format = " yuv"; break; + case 2: format = " yuv_swapuv"; break; + case 3: format = " yuv_swapuvy"; break; + } + break; + case 6: + type = "compressed"; + switch ((dword>>3) & 0x7) { + case 0: format = " dxt1"; break; + case 1: format = " dxt2_3"; break; + case 2: format = " dxt4_5"; break; + case 3: format = " fxt1"; break; + case 4: format = " dxt1_rb"; break; + } + break; + case 7: + type = "4b indexed"; + switch ((dword>>3) & 0xf) { + case 7: format = " argb8888"; break; + } + break; + default: + format = "BAD"; + break; + } + dword = data[i]; + kgem_debug_print(data, offset, i++, "map %d MS3 [width=%d, height=%d, format=%s%s, tiling=%s%s]\n", + map, width, height, type, format, tiling, + dword&(1<<9)?" palette select":""); + + dword = data[i]; + pitch = 4*(((dword >> 21) & ((1 << 11) - 1))+1); + kgem_debug_print(data, offset, i++, "map %d MS4 [pitch=%d, max_lod=%i, vol_depth=%i, cube_face_ena=%x, %s]\n", + map, pitch, + (dword>>9)&0x3f, dword&0xff, (dword>>15)&0x3f, + dword&(1<<8)?"miplayout legacy":"miplayout right"); + } + } + assert(len == i); + return len; + case 0x06: + kgem_debug_print(data, offset, 0, "3DSTATE_PIXEL_SHADER_CONSTANTS\n"); + len = (data[0] & 0x000000ff) + 2; + + i = 2; + for (c = 0; c <= 31; c++) { + if (data[1] & (1 << c)) { + kgem_debug_print(data, offset, i, "C%d.X = %f\n", + c, int_as_float(data[i])); + i++; + kgem_debug_print(data, offset, i, "C%d.Y = %f\n", + c, int_as_float(data[i])); + i++; + kgem_debug_print(data, offset, i, "C%d.Z = %f\n", + c, int_as_float(data[i])); + i++; + kgem_debug_print(data, offset, i, "C%d.W = %f\n", + c, int_as_float(data[i])); + i++; + } + } + assert(len == i); + return len; + case 0x05: + kgem_debug_print(data, offset, 0, "3DSTATE_PIXEL_SHADER_PROGRAM\n"); + len = (data[0] & 0x000000ff) + 2; + assert(((len-1) % 3) == 0); + assert(len <= 370); + i = 1; + for (instr = 0; instr < (len - 1) / 3; instr++) { + char instr_prefix[10]; + + sprintf(instr_prefix, "PS%03d", instr); + gen3_decode_instruction(data, offset, i, instr_prefix); + i += 3; + } + return len; + case 0x01: + kgem_debug_print(data, offset, 0, "3DSTATE_SAMPLER_STATE\n"); + kgem_debug_print(data, offset, 1, "mask\n"); + len = (data[0] & 0x0000003f) + 2; + i = 2; + for (sampler = 0; sampler <= 15; sampler++) { + if (data[1] & (1 << sampler)) { + uint32_t dword; + char *mip_filter = ""; + dword = data[i]; + switch ((dword>>20)&0x3) { + case 0: mip_filter = "none"; break; + case 1: mip_filter = "nearest"; break; + case 3: mip_filter = "linear"; break; + } + kgem_debug_print(data, offset, i++, "sampler %d SS2:%s%s%s " + "base_mip_level=%i, mip_filter=%s, mag_filter=%s, min_filter=%s " + "lod_bias=%.2f,%s max_aniso=%i, shadow_func=%s\n", sampler, + dword&(1<<31)?" reverse gamma,":"", + dword&(1<<30)?" packed2planar,":"", + dword&(1<<29)?" colorspace conversion,":"", + (dword>>22)&0x1f, + mip_filter, + gen3_decode_sample_filter(dword>>17), + gen3_decode_sample_filter(dword>>14), + ((dword>>5)&0x1ff)/(0x10*1.0), + dword&(1<<4)?" shadow,":"", + dword&(1<<3)?4:2, + gen3_decode_compare_func(dword)); + dword = data[i]; + kgem_debug_print(data, offset, i++, "sampler %d SS3: min_lod=%.2f,%s " + "tcmode_x=%s, tcmode_y=%s, tcmode_z=%s,%s texmap_idx=%i,%s\n", + sampler, ((dword>>24)&0xff)/(0x10*1.0), + dword&(1<<17)?" kill pixel enable,":"", + decode_tex_coord_mode(dword>>12), + decode_tex_coord_mode(dword>>9), + decode_tex_coord_mode(dword>>6), + dword&(1<<5)?" normalized coords,":"", + (dword>>1)&0xf, + dword&(1<<0)?" deinterlacer,":""); + dword = data[i]; + kgem_debug_print(data, offset, i++, "sampler %d SS4: border color\n", + sampler, ((dword>>24)&0xff)/(0x10*1.0), + dword); + } + } + assert(len == i); + return len; + case 0x85: + len = (data[0] & 0x0000000f) + 2; + assert(len == 2); + + kgem_debug_print(data, offset, 0, + "3DSTATE_DEST_BUFFER_VARIABLES\n"); + + switch ((data[1] >> 8) & 0xf) { + case 0x0: format = "g8"; break; + case 0x1: format = "x1r5g5b5"; break; + case 0x2: format = "r5g6b5"; break; + case 0x3: format = "a8r8g8b8"; break; + case 0x4: format = "ycrcb_swapy"; break; + case 0x5: format = "ycrcb_normal"; break; + case 0x6: format = "ycrcb_swapuv"; break; + case 0x7: format = "ycrcb_swapuvy"; break; + case 0x8: format = "a4r4g4b4"; break; + case 0x9: format = "a1r5g5b5"; break; + case 0xa: format = "a2r10g10b10"; break; + default: format = "BAD"; break; + } + switch ((data[1] >> 2) & 0x3) { + case 0x0: zformat = "u16"; break; + case 0x1: zformat = "f16"; break; + case 0x2: zformat = "u24x8"; break; + default: zformat = "BAD"; break; + } + kgem_debug_print(data, offset, 1, "%s format, %s depth format, early Z %sabled\n", + format, zformat, + (data[1] & (1 << 31)) ? "en" : "dis"); + return len; + + case 0x8e: + { + const char *name, *tiling; + + len = (data[0] & 0x0000000f) + 2; + assert(len == 3); + + switch((data[1] >> 24) & 0x7) { + case 0x3: name = "color"; break; + case 0x7: name = "depth"; break; + default: name = "unknown"; break; + } + + tiling = "none"; + if (data[1] & (1 << 23)) + tiling = "fenced"; + else if (data[1] & (1 << 22)) + tiling = data[1] & (1 << 21) ? "Y" : "X"; + + kgem_debug_print(data, offset, 0, "3DSTATE_BUFFER_INFO\n"); + kgem_debug_print(data, offset, 1, "%s, tiling = %s, pitch=%d\n", name, tiling, data[1]&0xffff); + + kgem_debug_print(data, offset, 2, "address\n"); + return len; + } + case 0x81: + len = (data[0] & 0x0000000f) + 2; + assert(len == 3); + + kgem_debug_print(data, offset, 0, + "3DSTATE_SCISSOR_RECTANGLE\n"); + kgem_debug_print(data, offset, 1, "(%d,%d)\n", + data[1] & 0xffff, data[1] >> 16); + kgem_debug_print(data, offset, 2, "(%d,%d)\n", + data[2] & 0xffff, data[2] >> 16); + + return len; + case 0x80: + len = (data[0] & 0x0000000f) + 2; + assert(len == 5); + + kgem_debug_print(data, offset, 0, + "3DSTATE_DRAWING_RECTANGLE\n"); + kgem_debug_print(data, offset, 1, "%s\n", + data[1]&(1<<30)?"depth ofs disabled ":""); + kgem_debug_print(data, offset, 2, "(%d,%d)\n", + data[2] & 0xffff, data[2] >> 16); + kgem_debug_print(data, offset, 3, "(%d,%d)\n", + data[3] & 0xffff, data[3] >> 16); + kgem_debug_print(data, offset, 4, "(%d,%d)\n", + (int16_t)(data[4] & 0xffff), + (int16_t)(data[4] >> 16)); + + return len; + case 0x9c: + len = (data[0] & 0x0000000f) + 2; + assert(len == 7); + + kgem_debug_print(data, offset, 0, + "3DSTATE_CLEAR_PARAMETERS\n"); + kgem_debug_print(data, offset, 1, "prim_type=%s, clear=%s%s%s\n", + data[1]&(1<<16)?"CLEAR_RECT":"ZONE_INIT", + data[1]&(1<<2)?"color,":"", + data[1]&(1<<1)?"depth,":"", + data[1]&(1<<0)?"stencil,":""); + kgem_debug_print(data, offset, 2, "clear color\n"); + kgem_debug_print(data, offset, 3, "clear depth/stencil\n"); + kgem_debug_print(data, offset, 4, "color value (rgba8888)\n"); + kgem_debug_print(data, offset, 5, "depth value %f\n", + int_as_float(data[5])); + kgem_debug_print(data, offset, 6, "clear stencil\n"); + return len; + } + + for (idx = 0; idx < ARRAY_SIZE(opcodes_3d_1d); idx++) { + opcode_3d_1d = &opcodes_3d_1d[idx]; + if (((data[0] & 0x00ff0000) >> 16) == opcode_3d_1d->opcode) { + len = (data[0] & 0xf) + 2; + kgem_debug_print(data, offset, 0, "%s\n", opcode_3d_1d->name); + for (i = 1; i < len; i++) + kgem_debug_print(data, offset, i, "dword %d\n", i); + + return len; + } + } + + kgem_debug_print(data, offset, 0, "3D UNKNOWN: 3d_1d opcode = 0x%x\n", opcode); + assert(0); + return 1; +} + +#define VERTEX_OUT(fmt, ...) do { \ + kgem_debug_print(data, offset, i, " V%d."fmt"\n", vertex, __VA_ARGS__); \ + i++; \ +} while (0) + +static int +gen3_decode_3d_primitive(struct kgem *kgem, uint32_t offset) +{ + uint32_t *data = kgem->batch + offset; + char immediate = (data[0] & (1 << 23)) == 0; + unsigned int len, i, ret; + char *primtype; + unsigned int vertex = 0; + + switch ((data[0] >> 18) & 0xf) { + case 0x0: primtype = "TRILIST"; break; + case 0x1: primtype = "TRISTRIP"; break; + case 0x2: primtype = "TRISTRIP_REVERSE"; break; + case 0x3: primtype = "TRIFAN"; break; + case 0x4: primtype = "POLYGON"; break; + case 0x5: primtype = "LINELIST"; break; + case 0x6: primtype = "LINESTRIP"; break; + case 0x7: primtype = "RECTLIST"; break; + case 0x8: primtype = "POINTLIST"; break; + case 0x9: primtype = "DIB"; break; + case 0xa: primtype = "CLEAR_RECT"; assert(0); break; + default: primtype = "unknown"; break; + } + + gen3_update_vertex_elements_offsets(kgem); + + /* XXX: 3DPRIM_DIB not supported */ + if (immediate) { + len = (data[0] & 0x0003ffff) + 2; + kgem_debug_print(data, offset, 0, "3DPRIMITIVE inline %s\n", primtype); + for (i = 1; i < len; ) { + ErrorF(" [%d]: ", vertex); + i += inline_vertex_out(kgem, data + i) / sizeof(uint32_t); + ErrorF("\n"); + vertex++; + } + + ret = len; + } else { + /* indirect vertices */ + len = data[0] & 0x0000ffff; /* index count */ + if (data[0] & (1 << 17)) { + /* random vertex access */ + kgem_debug_print(data, offset, 0, + "3DPRIMITIVE random indirect %s (%d)\n", primtype, len); + assert(0); + if (len == 0) { + /* vertex indices continue until 0xffff is found */ + } else { + /* fixed size vertex index buffer */ + } + ret = (len + 1) / 2 + 1; + goto out; + } else { + /* sequential vertex access */ + vertex = data[1] & 0xffff; + kgem_debug_print(data, offset, 0, + "3DPRIMITIVE sequential indirect %s, %d starting from " + "%d\n", primtype, len, vertex); + kgem_debug_print(data, offset, 1, " start\n"); + for (i = 0; i < len; i++) { + ErrorF(" [%d]: ", vertex); + indirect_vertex_out(kgem, vertex++); + ErrorF("\n"); + } + ret = 2; + goto out; + } + } + +out: + return ret; +} + +int kgem_gen3_decode_3d(struct kgem *kgem, uint32_t offset) +{ + uint32_t opcode; + unsigned int idx; + + struct { + uint32_t opcode; + int min_len; + int max_len; + char *name; + } opcodes[] = { + { 0x06, 1, 1, "3DSTATE_ANTI_ALIASING" }, + { 0x08, 1, 1, "3DSTATE_BACKFACE_STENCIL_OPS" }, + { 0x09, 1, 1, "3DSTATE_BACKFACE_STENCIL_MASKS" }, + { 0x16, 1, 1, "3DSTATE_COORD_SET_BINDINGS" }, + { 0x15, 1, 1, "3DSTATE_FOG_COLOR" }, + { 0x0b, 1, 1, "3DSTATE_INDEPENDENT_ALPHA_BLEND" }, + { 0x0d, 1, 1, "3DSTATE_MODES_4" }, + { 0x0c, 1, 1, "3DSTATE_MODES_5" }, + { 0x07, 1, 1, "3DSTATE_RASTERIZATION_RULES" }, + }; + uint32_t *data = kgem->batch + offset; + + opcode = (data[0] & 0x1f000000) >> 24; + + switch (opcode) { + case 0x1f: + return gen3_decode_3d_primitive(kgem, offset); + case 0x1d: + return gen3_decode_3d_1d(kgem, offset); + case 0x1c: + return gen3_decode_3d_1c(kgem, offset); + } + + for (idx = 0; idx < ARRAY_SIZE(opcodes); idx++) { + if (opcode == opcodes[idx].opcode) { + unsigned int len = 1, i; + + kgem_debug_print(data, offset, 0, "%s\n", opcodes[idx].name); + if (opcodes[idx].max_len > 1) { + len = (data[0] & 0xff) + 2; + assert(len >= opcodes[idx].min_len || + len <= opcodes[idx].max_len); + } + + for (i = 1; i < len; i++) + kgem_debug_print(data, offset, i, "dword %d\n", i); + return len; + } + } + + kgem_debug_print(data, offset, 0, "3D UNKNOWN: 3d opcode = 0x%x\n", opcode); + return 1; +} + + +void kgem_gen3_finish_state(struct kgem *kgem) +{ + if (state.vb.current) + munmap(state.vb.base, state.vb.current->size); + + memset(&state, 0, sizeof(state)); +} diff --git a/src/sna/kgem_debug_gen4.c b/src/sna/kgem_debug_gen4.c new file mode 100644 index 00000000..d736cbd9 --- /dev/null +++ b/src/sna/kgem_debug_gen4.c @@ -0,0 +1,711 @@ +/* + * Copyright © 2007-2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Eric Anholt <eric@anholt.net> + * Chris Wilson <chris@chris-wilson.co.uk> + * + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <sys/mman.h> +#include <assert.h> + +#include "sna.h" +#include "sna_reg.h" + +#include "gen4_render.h" + +#include "kgem_debug.h" + +static struct state { + struct vertex_buffer { + int handle; + void *base; + const char *ptr; + int pitch; + + struct kgem_bo *current; + } vb[33]; + struct vertex_elements { + int buffer; + int offset; + bool valid; + uint32_t type; + uint8_t swizzle[4]; + } ve[33]; + int num_ve; + + struct dynamic_state { + struct kgem_bo *current; + void *base, *ptr; + } dynamic_state; +} state; + +static void gen4_update_vertex_buffer(struct kgem *kgem, const uint32_t *data) +{ + uint32_t reloc = sizeof(uint32_t) * (&data[1] - kgem->batch); + struct kgem_bo *bo = NULL; + void *base, *ptr; + int i; + + for (i = 0; i < kgem->nreloc; i++) + if (kgem->reloc[i].offset == reloc) + break; + assert(i < kgem->nreloc); + reloc = kgem->reloc[i].target_handle; + + if (reloc == 0) { + base = kgem->batch; + } else { + list_for_each_entry(bo, &kgem->next_request->buffers, request) + if (bo->handle == reloc) + break; + assert(&bo->request != &kgem->next_request->buffers); + base = kgem_bo_map(kgem, bo, PROT_READ); + } + ptr = (char *)base + kgem->reloc[i].delta; + + i = data[0] >> 27; + if (state.vb[i].current) + munmap(state.vb[i].base, state.vb[i].current->size); + + state.vb[i].current = bo; + state.vb[i].base = base; + state.vb[i].ptr = ptr; + state.vb[i].pitch = data[0] & 0x7ff; +} + +static uint32_t +get_ve_component(uint32_t data, int component) +{ + return (data >> (16 + (3 - component) * 4)) & 0x7; +} + +static void gen4_update_vertex_elements(struct kgem *kgem, int id, const uint32_t *data) +{ + state.ve[id].buffer = data[0] >> 27; + state.ve[id].valid = !!(data[0] & (1 << 26)); + state.ve[id].type = (data[0] >> 16) & 0x1ff; + state.ve[id].offset = data[0] & 0x7ff; + state.ve[id].swizzle[0] = get_ve_component(data[1], 0); + state.ve[id].swizzle[1] = get_ve_component(data[1], 1); + state.ve[id].swizzle[2] = get_ve_component(data[1], 2); + state.ve[id].swizzle[3] = get_ve_component(data[1], 3); +} + +static void vertices_sint16_out(const struct vertex_elements *ve, const int16_t *v, int max) +{ + int c; + + ErrorF("("); + for (c = 0; c < max; c++) { + switch (ve->swizzle[c]) { + case 0: ErrorF("#"); break; + case 1: ErrorF("%d", v[c]); break; + case 2: ErrorF("0.0"); break; + case 3: ErrorF("1.0"); break; + case 4: ErrorF("0x1"); break; + case 5: break; + default: ErrorF("?"); + } + if (c < 3) + ErrorF(", "); + } + for (; c < 4; c++) { + switch (ve->swizzle[c]) { + case 0: ErrorF("#"); break; + case 1: ErrorF("1.0"); break; + case 2: ErrorF("0.0"); break; + case 3: ErrorF("1.0"); break; + case 4: ErrorF("0x1"); break; + case 5: break; + default: ErrorF("?"); + } + if (c < 3) + ErrorF(", "); + } + ErrorF(")"); +} + +static void vertices_float_out(const struct vertex_elements *ve, const float *f, int max) +{ + int c, o; + + ErrorF("("); + for (c = o = 0; c < 4 && o < max; c++) { + switch (ve->swizzle[c]) { + case 0: ErrorF("#"); break; + case 1: ErrorF("%f", f[o++]); break; + case 2: ErrorF("0.0"); break; + case 3: ErrorF("1.0"); break; + case 4: ErrorF("0x1"); break; + case 5: break; + default: ErrorF("?"); + } + if (c < 3) + ErrorF(", "); + } + for (; c < 4; c++) { + switch (ve->swizzle[c]) { + case 0: ErrorF("#"); break; + case 1: ErrorF("1.0"); break; + case 2: ErrorF("0.0"); break; + case 3: ErrorF("1.0"); break; + case 4: ErrorF("0x1"); break; + case 5: break; + default: ErrorF("?"); + } + if (c < 3) + ErrorF(", "); + } + ErrorF(")"); +} + +static void ve_out(const struct vertex_elements *ve, const void *ptr) +{ + switch (ve->type) { + case GEN4_SURFACEFORMAT_R32_FLOAT: + vertices_float_out(ve, ptr, 1); + break; + case GEN4_SURFACEFORMAT_R32G32_FLOAT: + vertices_float_out(ve, ptr, 2); + break; + case GEN4_SURFACEFORMAT_R32G32B32_FLOAT: + vertices_float_out(ve, ptr, 3); + break; + case GEN4_SURFACEFORMAT_R32G32B32A32_FLOAT: + vertices_float_out(ve, ptr, 4); + break; + case GEN4_SURFACEFORMAT_R16_SINT: + vertices_sint16_out(ve, ptr, 1); + break; + case GEN4_SURFACEFORMAT_R16G16_SINT: + vertices_sint16_out(ve, ptr, 2); + break; + case GEN4_SURFACEFORMAT_R16G16B16A16_SINT: + vertices_sint16_out(ve, ptr, 4); + break; + case GEN4_SURFACEFORMAT_R16_SSCALED: + vertices_sint16_out(ve, ptr, 1); + break; + case GEN4_SURFACEFORMAT_R16G16_SSCALED: + vertices_sint16_out(ve, ptr, 2); + break; + case GEN4_SURFACEFORMAT_R16G16B16A16_SSCALED: + vertices_sint16_out(ve, ptr, 4); + break; + } +} + +static void indirect_vertex_out(struct kgem *kgem, uint32_t v) +{ + int i = 0; + + do { + const struct vertex_elements *ve = &state.ve[i]; + const struct vertex_buffer *vb = &state.vb[ve->buffer]; + const void *ptr = vb->ptr + v * vb->pitch + ve->offset; + + if (!ve->valid) + continue; + + ve_out(ve, ptr); + + while (++i <= state.num_ve && !state.ve[i].valid) + ; + + if (i <= state.num_ve) + ErrorF(", "); + } while (i <= state.num_ve); +} + +static void primitive_out(struct kgem *kgem, uint32_t *data) +{ + int n; + + assert((data[0] & (1<<15)) == 0); /* XXX index buffers */ + + for (n = 0; n < data[1]; n++) { + int v = data[2] + n; + ErrorF(" [%d:%d] = ", n, v); + indirect_vertex_out(kgem, v); + ErrorF("\n"); + } +} + +static void +state_base_out(uint32_t *data, uint32_t offset, unsigned int index, + char *name) +{ + if (data[index] & 1) + kgem_debug_print(data, offset, index, + "%s state base address 0x%08x\n", + name, data[index] & ~1); + else + kgem_debug_print(data, offset, index, + "%s state base not updated\n", + name); +} + +static void +state_max_out(uint32_t *data, uint32_t offset, unsigned int index, + char *name) +{ + if (data[index] == 1) + kgem_debug_print(data, offset, index, + "%s state upper bound disabled\n", name); + else if (data[index] & 1) + kgem_debug_print(data, offset, index, + "%s state upper bound 0x%08x\n", + name, data[index] & ~1); + else + kgem_debug_print(data, offset, index, + "%s state upper bound not updated\n", + name); +} + +static const char * +get_965_surfacetype(unsigned int surfacetype) +{ + switch (surfacetype) { + case 0: return "1D"; + case 1: return "2D"; + case 2: return "3D"; + case 3: return "CUBE"; + case 4: return "BUFFER"; + case 7: return "NULL"; + default: return "unknown"; + } +} + +static const char * +get_965_depthformat(unsigned int depthformat) +{ + switch (depthformat) { + case 0: return "s8_z24float"; + case 1: return "z32float"; + case 2: return "z24s8"; + case 5: return "z16"; + default: return "unknown"; + } +} + +static const char * +get_965_element_component(uint32_t data, int component) +{ + uint32_t component_control = (data >> (16 + (3 - component) * 4)) & 0x7; + + switch (component_control) { + case 0: + return "nostore"; + case 1: + switch (component) { + case 0: return "X"; + case 1: return "Y"; + case 2: return "Z"; + case 3: return "W"; + default: return "fail"; + } + case 2: + return "0.0"; + case 3: + return "1.0"; + case 4: + return "0x1"; + case 5: + return "VID"; + default: + return "fail"; + } +} + +static const char * +get_965_prim_type(uint32_t data) +{ + uint32_t primtype = (data >> 10) & 0x1f; + + switch (primtype) { + case 0x01: return "point list"; + case 0x02: return "line list"; + case 0x03: return "line strip"; + case 0x04: return "tri list"; + case 0x05: return "tri strip"; + case 0x06: return "tri fan"; + case 0x07: return "quad list"; + case 0x08: return "quad strip"; + case 0x09: return "line list adj"; + case 0x0a: return "line strip adj"; + case 0x0b: return "tri list adj"; + case 0x0c: return "tri strip adj"; + case 0x0d: return "tri strip reverse"; + case 0x0e: return "polygon"; + case 0x0f: return "rect list"; + case 0x10: return "line loop"; + case 0x11: return "point list bf"; + case 0x12: return "line strip cont"; + case 0x13: return "line strip bf"; + case 0x14: return "line strip cont bf"; + case 0x15: return "tri fan no stipple"; + default: return "fail"; + } +} + +#if 0 +struct reloc { + struct kgem_bo *bo; + void *base; +}; + +static void * +get_reloc(struct kgem *kgem, + void *base, const uint32_t *reloc, + struct reloc *r) +{ + uint32_t delta = *reloc; + + memset(r, 0, sizeof(*r)); + + if (base == 0) { + uint32_t handle = sizeof(uint32_t) * (reloc - kgem->batch); + struct kgem_bo *bo = NULL; + int i; + + for (i = 0; i < kgem->nreloc; i++) + if (kgem->reloc[i].offset == handle) + break; + assert(i < kgem->nreloc); + handle = kgem->reloc[i].target_handle; + delta = kgem->reloc[i].delta; + + if (handle == 0) { + base = kgem->batch; + } else { + list_for_each_entry(bo, &kgem->next_request->buffers, request) + if (bo->handle == handle) + break; + assert(&bo->request != &kgem->next_request->buffers); + base = kgem_bo_map(kgem, bo, PROT_READ); + r->bo = bo; + r->base = base; + } + } + + return (char *)base + delta; +} + +static void +put_reloc(struct kgem *kgem, struct reloc *r) +{ + if (r->bo != NULL) + munmap(r->base, r->bo->size); +} +#endif + +int kgem_gen4_decode_3d(struct kgem *kgem, uint32_t offset) +{ + static const struct { + uint32_t opcode; + int min_len; + int max_len; + const char *name; + } opcodes[] = { + { 0x6000, 3, 3, "URB_FENCE" }, + { 0x6001, 2, 2, "CS_URB_FENCE" }, + { 0x6002, 2, 2, "CONSTANT_BUFFER" }, + { 0x6101, 6, 6, "STATE_BASE_ADDRESS" }, + { 0x6102, 2, 2 , "STATE_SIP" }, + { 0x6104, 1, 1, "3DSTATE_PIPELINE_SELECT" }, + { 0x680b, 1, 1, "3DSTATE_VF_STATISTICS" }, + { 0x6904, 1, 1, "3DSTATE_PIPELINE_SELECT" }, + { 0x7800, 7, 7, "3DSTATE_PIPELINED_POINTERS" }, + { 0x7801, 6, 6, "3DSTATE_BINDING_TABLE_POINTERS" }, + { 0x7808, 5, 257, "3DSTATE_VERTEX_BUFFERS" }, + { 0x7809, 3, 256, "3DSTATE_VERTEX_ELEMENTS" }, + { 0x780a, 3, 3, "3DSTATE_INDEX_BUFFER" }, + { 0x780b, 1, 1, "3DSTATE_VF_STATISTICS" }, + { 0x7900, 4, 4, "3DSTATE_DRAWING_RECTANGLE" }, + { 0x7901, 5, 5, "3DSTATE_CONSTANT_COLOR" }, + { 0x7905, 5, 7, "3DSTATE_DEPTH_BUFFER" }, + { 0x7906, 2, 2, "3DSTATE_POLY_STIPPLE_OFFSET" }, + { 0x7907, 33, 33, "3DSTATE_POLY_STIPPLE_PATTERN" }, + { 0x7908, 3, 3, "3DSTATE_LINE_STIPPLE" }, + { 0x7909, 2, 2, "3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP" }, + { 0x7909, 2, 2, "3DSTATE_CLEAR_PARAMS" }, + { 0x790a, 3, 3, "3DSTATE_AA_LINE_PARAMETERS" }, + { 0x790b, 4, 4, "3DSTATE_GS_SVB_INDEX" }, + { 0x790d, 3, 3, "3DSTATE_MULTISAMPLE" }, + { 0x7910, 2, 2, "3DSTATE_CLEAR_PARAMS" }, + { 0x7b00, 6, 6, "3DPRIMITIVE" }, + { 0x7805, 3, 3, "3DSTATE_URB" }, + { 0x7815, 5, 5, "3DSTATE_CONSTANT_VS_STATE" }, + { 0x7816, 5, 5, "3DSTATE_CONSTANT_GS_STATE" }, + { 0x7817, 5, 5, "3DSTATE_CONSTANT_PS_STATE" }, + { 0x7818, 2, 2, "3DSTATE_SAMPLE_MASK" }, + }; + uint32_t *data = kgem->batch + offset; + uint32_t op; + unsigned int len; + int i; + char *desc1 = NULL; + + len = (data[0] & 0xff) + 2; + op = (data[0] & 0xffff0000) >> 16; + switch (op) { + case 0x6000: + assert(len == 3); + + kgem_debug_print(data, offset, 0, "URB_FENCE: %s%s%s%s%s%s\n", + (data[0] >> 13) & 1 ? "cs " : "", + (data[0] >> 12) & 1 ? "vfe " : "", + (data[0] >> 11) & 1 ? "sf " : "", + (data[0] >> 10) & 1 ? "clip " : "", + (data[0] >> 9) & 1 ? "gs " : "", + (data[0] >> 8) & 1 ? "vs " : ""); + kgem_debug_print(data, offset, 1, + "vs fence: %d, gs_fence: %d, clip_fence: %d\n", + data[1] & 0x3ff, + (data[1] >> 10) & 0x3ff, + (data[1] >> 20) & 0x3ff); + kgem_debug_print(data, offset, 2, + "sf fence: %d, vfe_fence: %d, cs_fence: %d\n", + data[2] & 0x3ff, + (data[2] >> 10) & 0x3ff, + (data[2] >> 20) & 0x7ff); + return len; + + case 0x6001: + kgem_debug_print(data, offset, 0, "CS_URB_STATE\n"); + kgem_debug_print(data, offset, 1, "entry_size: %d [%d bytes], n_entries: %d\n", + (data[1] >> 4) & 0x1f, + (((data[1] >> 4) & 0x1f) + 1) * 64, + data[1] & 0x7); + return len; + case 0x6002: + kgem_debug_print(data, offset, 0, "CONSTANT_BUFFER: %s\n", + (data[0] >> 8) & 1 ? "valid" : "invalid"); + kgem_debug_print(data, offset, 1, "offset: 0x%08x, length: %d bytes\n", + data[1] & ~0x3f, ((data[1] & 0x3f) + 1) * 64); + return len; + case 0x6101: + i = 0; + kgem_debug_print(data, offset, i++, "STATE_BASE_ADDRESS\n"); + assert(len == 6); + + state_base_out(data, offset, i++, "general"); + state_base_out(data, offset, i++, "surface"); + state_base_out(data, offset, i++, "media"); + + state_max_out(data, offset, i++, "general"); + state_max_out(data, offset, i++, "media"); + + return len; + + case 0x7801: + assert(len == 6); + + kgem_debug_print(data, offset, 0, + "3DSTATE_BINDING_TABLE_POINTERS\n"); + kgem_debug_print(data, offset, 1, "VS binding table\n"); + kgem_debug_print(data, offset, 2, "GS binding table\n"); + kgem_debug_print(data, offset, 3, "CLIP binding table\n"); + kgem_debug_print(data, offset, 4, "SF binding table\n"); + kgem_debug_print(data, offset, 5, "WM binding table\n"); + + return len; + + case 0x7808: + assert((len - 1) % 4 == 0); + kgem_debug_print(data, offset, 0, "3DSTATE_VERTEX_BUFFERS\n"); + + for (i = 1; i < len;) { + gen4_update_vertex_buffer(kgem, data + i); + + kgem_debug_print(data, offset, i, "buffer %d: %s, pitch %db\n", + data[i] >> 27, + data[i] & (1 << 20) ? "random" : "sequential", + data[i] & 0x07ff); + i++; + kgem_debug_print(data, offset, i++, "buffer address\n"); + kgem_debug_print(data, offset, i++, "max index\n"); + kgem_debug_print(data, offset, i++, "mbz\n"); + } + return len; + + case 0x7809: + assert((len + 1) % 2 == 0); + kgem_debug_print(data, offset, 0, "3DSTATE_VERTEX_ELEMENTS\n"); + + memset(state.ve, 0, sizeof(state.ve)); /* XXX? */ + for (i = 1; i < len;) { + gen4_update_vertex_elements(kgem, (i - 1)/2, data + i); + + kgem_debug_print(data, offset, i, "buffer %d: %svalid, type 0x%04x, " + "src offset 0x%04x bytes\n", + data[i] >> 27, + data[i] & (1 << 26) ? "" : "in", + (data[i] >> 16) & 0x1ff, + data[i] & 0x07ff); + i++; + kgem_debug_print(data, offset, i, "(%s, %s, %s, %s), " + "dst offset 0x%02x bytes\n", + get_965_element_component(data[i], 0), + get_965_element_component(data[i], 1), + get_965_element_component(data[i], 2), + get_965_element_component(data[i], 3), + (data[i] & 0xff) * 4); + i++; + } + state.num_ve = (len - 1) / 2; /* XXX? */ + return len; + + case 0x780a: + assert(len == 3); + kgem_debug_print(data, offset, 0, "3DSTATE_INDEX_BUFFER\n"); + kgem_debug_print(data, offset, 1, "beginning buffer address\n"); + kgem_debug_print(data, offset, 2, "ending buffer address\n"); + return len; + + case 0x7900: + assert(len == 4); + kgem_debug_print(data, offset, 0, + "3DSTATE_DRAWING_RECTANGLE\n"); + kgem_debug_print(data, offset, 1, "top left: %d,%d\n", + data[1] & 0xffff, + (data[1] >> 16) & 0xffff); + kgem_debug_print(data, offset, 2, "bottom right: %d,%d\n", + data[2] & 0xffff, + (data[2] >> 16) & 0xffff); + kgem_debug_print(data, offset, 3, "origin: %d,%d\n", + (int)data[3] & 0xffff, + ((int)data[3] >> 16) & 0xffff); + return len; + + case 0x7905: + assert(len == 7); + kgem_debug_print(data, offset, 0, + "3DSTATE_DEPTH_BUFFER\n"); + kgem_debug_print(data, offset, 1, "%s, %s, pitch = %d bytes, %stiled, HiZ %d, Seperate Stencil %d\n", + get_965_surfacetype(data[1] >> 29), + get_965_depthformat((data[1] >> 18) & 0x7), + (data[1] & 0x0001ffff) + 1, + data[1] & (1 << 27) ? "" : "not ", + (data[1] & (1 << 22)) != 0, + (data[1] & (1 << 21)) != 0); + kgem_debug_print(data, offset, 2, "depth offset\n"); + kgem_debug_print(data, offset, 3, "%dx%d\n", + ((data[3] & 0x0007ffc0) >> 6) + 1, + ((data[3] & 0xfff80000) >> 19) + 1); + kgem_debug_print(data, offset, 4, "volume depth\n"); + kgem_debug_print(data, offset, 5, "\n"); + kgem_debug_print(data, offset, 6, "\n"); + return len; + + case 0x7a00: + assert(len == 4 || len == 5); + switch ((data[1] >> 14) & 0x3) { + case 0: desc1 = "no write"; break; + case 1: desc1 = "qword write"; break; + case 2: desc1 = "PS_DEPTH_COUNT write"; break; + case 3: desc1 = "TIMESTAMP write"; break; + } + kgem_debug_print(data, offset, 0, "PIPE_CONTROL\n"); + kgem_debug_print(data, offset, 1, + "%s, %scs stall, %stlb invalidate, " + "%ssync gfdt, %sdepth stall, %sRC write flush, " + "%sinst flush, %sTC flush\n", + desc1, + data[1] & (1 << 20) ? "" : "no ", + data[1] & (1 << 18) ? "" : "no ", + data[1] & (1 << 17) ? "" : "no ", + data[1] & (1 << 13) ? "" : "no ", + data[1] & (1 << 12) ? "" : "no ", + data[1] & (1 << 11) ? "" : "no ", + data[1] & (1 << 10) ? "" : "no "); + if (len == 5) { + kgem_debug_print(data, offset, 2, "destination address\n"); + kgem_debug_print(data, offset, 3, "immediate dword low\n"); + kgem_debug_print(data, offset, 4, "immediate dword high\n"); + } else { + for (i = 2; i < len; i++) { + kgem_debug_print(data, offset, i, "\n"); + } + } + return len; + + case 0x7b00: + assert(len == 6); + kgem_debug_print(data, offset, 0, + "3DPRIMITIVE: %s %s\n", + get_965_prim_type(data[0]), + (data[0] & (1 << 15)) ? "random" : "sequential"); + kgem_debug_print(data, offset, 1, "vertex count\n"); + kgem_debug_print(data, offset, 2, "start vertex\n"); + kgem_debug_print(data, offset, 3, "instance count\n"); + kgem_debug_print(data, offset, 4, "start instance\n"); + kgem_debug_print(data, offset, 5, "index bias\n"); + primitive_out(kgem, data); + return len; + } + + /* For the rest, just dump the bytes */ + for (i = 0; i < ARRAY_SIZE(opcodes); i++) + if (op == opcodes[i].opcode) + break; + + assert(i < ARRAY_SIZE(opcodes)); + + len = 1; + kgem_debug_print(data, offset, 0, "%s\n", opcodes[i].name); + if (opcodes[i].max_len > 1) { + len = (data[0] & 0xff) + 2; + assert(len >= opcodes[i].min_len && + len <= opcodes[i].max_len); + } + + for (i = 1; i < len; i++) + kgem_debug_print(data, offset, i, "dword %d\n", i); + + return len; +} + +static void finish_vertex_buffers(struct kgem *kgem) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(state.vb); i++) + if (state.vb[i].current) + munmap(state.vb[i].base, state.vb[i].current->size); +} + +void kgem_gen4_finish_state(struct kgem *kgem) +{ + finish_vertex_buffers(kgem); + + if (state.dynamic_state.current) + munmap(state.dynamic_state.base, state.dynamic_state.current->size); + + memset(&state, 0, sizeof(state)); +} diff --git a/src/sna/kgem_debug_gen5.c b/src/sna/kgem_debug_gen5.c new file mode 100644 index 00000000..78ba4432 --- /dev/null +++ b/src/sna/kgem_debug_gen5.c @@ -0,0 +1,687 @@ +/* + * Copyright © 2007-2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Eric Anholt <eric@anholt.net> + * Chris Wilson <chris@chris-wilson.co.uk> + * + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <sys/mman.h> +#include <assert.h> + +#include "sna.h" +#include "sna_reg.h" + +#include "gen5_render.h" + +#include "kgem_debug.h" + +static struct state { + struct vertex_buffer { + int handle; + void *base; + int size; + const char *ptr; + int pitch; + + struct kgem_bo *current; + } vb[17]; + struct vertex_elements { + int buffer; + int offset; + bool valid; + uint32_t type; + uint8_t swizzle[4]; + } ve[17]; + int num_ve; + + struct dynamic_state { + struct kgem_bo *current; + void *base, *ptr; + } dynamic_state; +} state; + +static void gen5_update_vertex_buffer(struct kgem *kgem, const uint32_t *data) +{ + struct drm_i915_gem_relocation_entry *reloc; + struct kgem_bo *bo = NULL; + void *base, *ptr; + int i, size; + + reloc = kgem_debug_get_reloc_entry(kgem, &data[1] - kgem->batch); + if (reloc->target_handle == 0) { + base = kgem->batch; + size = kgem->nbatch * sizeof(uint32_t); + } else { + bo = kgem_debug_get_bo_for_reloc_entry(kgem, reloc); + base = kgem_bo_map(kgem, bo, PROT_READ); + size = bo->size; + } + ptr = (char *)base + reloc->delta; + + i = data[0] >> 27; + if (state.vb[i].current) + munmap(state.vb[i].base, state.vb[i].current->size); + + state.vb[i].handle = reloc->target_handle; + state.vb[i].current = bo; + state.vb[i].base = base; + state.vb[i].ptr = ptr; + state.vb[i].pitch = data[0] & 0x7ff; + state.vb[i].size = size; +} + +static uint32_t +get_ve_component(uint32_t data, int component) +{ + return (data >> (16 + (3 - component) * 4)) & 0x7; +} + +static void gen5_update_vertex_elements(struct kgem *kgem, int id, const uint32_t *data) +{ + state.ve[id].buffer = data[0] >> 27; + state.ve[id].valid = !!(data[0] & (1 << 26)); + state.ve[id].type = (data[0] >> 16) & 0x1ff; + state.ve[id].offset = data[0] & 0x7ff; + state.ve[id].swizzle[0] = get_ve_component(data[1], 0); + state.ve[id].swizzle[1] = get_ve_component(data[1], 1); + state.ve[id].swizzle[2] = get_ve_component(data[1], 2); + state.ve[id].swizzle[3] = get_ve_component(data[1], 3); +} + +static void vertices_sint16_out(const struct vertex_elements *ve, const int16_t *v, int max) +{ + int c, o; + + ErrorF("("); + for (c = o = 0; c < 4 && o < max; c++) { + switch (ve->swizzle[c]) { + case 0: ErrorF("#"); break; + case 1: ErrorF("%d", v[o++]); break; + case 2: ErrorF("0.0"); break; + case 3: ErrorF("1.0"); break; + case 4: ErrorF("0x1"); break; + case 5: break; + default: ErrorF("?"); + } + if (o < max) + ErrorF(", "); + } + ErrorF(")"); +} + +static void vertices_float_out(const struct vertex_elements *ve, const float *f, int max) +{ + int c, o; + + ErrorF("("); + for (c = o = 0; c < 4 && o < max; c++) { + switch (ve->swizzle[c]) { + case 0: ErrorF("#"); break; + case 1: ErrorF("%f", f[o++]); break; + case 2: ErrorF("0.0"); break; + case 3: ErrorF("1.0"); break; + case 4: ErrorF("0x1"); break; + case 5: break; + default: ErrorF("?"); + } + if (o < max) + ErrorF(", "); + } + ErrorF(")"); +} + +static void ve_out(const struct vertex_elements *ve, const void *ptr) +{ + switch (ve->type) { + case GEN5_SURFACEFORMAT_R32_FLOAT: + vertices_float_out(ve, ptr, 1); + break; + case GEN5_SURFACEFORMAT_R32G32_FLOAT: + vertices_float_out(ve, ptr, 2); + break; + case GEN5_SURFACEFORMAT_R32G32B32_FLOAT: + vertices_float_out(ve, ptr, 3); + break; + case GEN5_SURFACEFORMAT_R32G32B32A32_FLOAT: + vertices_float_out(ve, ptr, 4); + break; + case GEN5_SURFACEFORMAT_R16_SINT: + vertices_sint16_out(ve, ptr, 1); + break; + case GEN5_SURFACEFORMAT_R16G16_SINT: + vertices_sint16_out(ve, ptr, 2); + break; + case GEN5_SURFACEFORMAT_R16G16B16A16_SINT: + vertices_sint16_out(ve, ptr, 4); + break; + case GEN5_SURFACEFORMAT_R16_SSCALED: + vertices_sint16_out(ve, ptr, 1); + break; + case GEN5_SURFACEFORMAT_R16G16_SSCALED: + vertices_sint16_out(ve, ptr, 2); + break; + case GEN5_SURFACEFORMAT_R16G16B16A16_SSCALED: + vertices_sint16_out(ve, ptr, 4); + break; + } +} + +static void indirect_vertex_out(struct kgem *kgem, uint32_t v) +{ + int i = 1; + + do { + const struct vertex_elements *ve = &state.ve[i]; + const struct vertex_buffer *vb = &state.vb[ve->buffer]; + const void *ptr = vb->ptr + v * vb->pitch + ve->offset; + + if (!ve->valid) + continue; + + assert(vb->pitch); + assert(ve->offset + v*vb->pitch < vb->size); + + ve_out(ve, ptr); + + while (++i <= state.num_ve && !state.ve[i].valid) + ; + + if (i <= state.num_ve) + ErrorF(", "); + } while (i <= state.num_ve); +} + +static void primitive_out(struct kgem *kgem, uint32_t *data) +{ + int n; + + assert((data[0] & (1<<15)) == 0); /* XXX index buffers */ + + for (n = 0; n < data[1]; n++) { + int v = data[2] + n; + ErrorF(" [%d:%d] = ", n, v); + indirect_vertex_out(kgem, v); + ErrorF("\n"); + } +} + +static void +state_base_out(uint32_t *data, uint32_t offset, unsigned int index, + char *name) +{ + if (data[index] & 1) + kgem_debug_print(data, offset, index, + "%s state base address 0x%08x\n", + name, data[index] & ~1); + else + kgem_debug_print(data, offset, index, + "%s state base not updated\n", + name); +} + +static void +state_max_out(uint32_t *data, uint32_t offset, unsigned int index, + char *name) +{ + if (data[index] == 1) + kgem_debug_print(data, offset, index, + "%s state upper bound disabled\n", name); + else if (data[index] & 1) + kgem_debug_print(data, offset, index, + "%s state upper bound 0x%08x\n", + name, data[index] & ~1); + else + kgem_debug_print(data, offset, index, + "%s state upper bound not updated\n", + name); +} + +static const char * +get_965_surfacetype(unsigned int surfacetype) +{ + switch (surfacetype) { + case 0: return "1D"; + case 1: return "2D"; + case 2: return "3D"; + case 3: return "CUBE"; + case 4: return "BUFFER"; + case 7: return "NULL"; + default: return "unknown"; + } +} + +static const char * +get_965_depthformat(unsigned int depthformat) +{ + switch (depthformat) { + case 0: return "s8_z24float"; + case 1: return "z32float"; + case 2: return "z24s8"; + case 5: return "z16"; + default: return "unknown"; + } +} + +static const char * +get_965_element_component(uint32_t data, int component) +{ + uint32_t component_control = (data >> (16 + (3 - component) * 4)) & 0x7; + + switch (component_control) { + case 0: + return "nostore"; + case 1: + switch (component) { + case 0: return "X"; + case 1: return "Y"; + case 2: return "Z"; + case 3: return "W"; + default: return "fail"; + } + case 2: + return "0.0"; + case 3: + return "1.0"; + case 4: + return "0x1"; + case 5: + return "VID"; + default: + return "fail"; + } +} + +static const char * +get_965_prim_type(uint32_t data) +{ + uint32_t primtype = (data >> 10) & 0x1f; + + switch (primtype) { + case 0x01: return "point list"; + case 0x02: return "line list"; + case 0x03: return "line strip"; + case 0x04: return "tri list"; + case 0x05: return "tri strip"; + case 0x06: return "tri fan"; + case 0x07: return "quad list"; + case 0x08: return "quad strip"; + case 0x09: return "line list adj"; + case 0x0a: return "line strip adj"; + case 0x0b: return "tri list adj"; + case 0x0c: return "tri strip adj"; + case 0x0d: return "tri strip reverse"; + case 0x0e: return "polygon"; + case 0x0f: return "rect list"; + case 0x10: return "line loop"; + case 0x11: return "point list bf"; + case 0x12: return "line strip cont"; + case 0x13: return "line strip bf"; + case 0x14: return "line strip cont bf"; + case 0x15: return "tri fan no stipple"; + default: return "fail"; + } +} + +#if 0 +struct reloc { + struct kgem_bo *bo; + void *base; +}; + +static void * +get_reloc(struct kgem *kgem, + void *base, const uint32_t *reloc, + struct reloc *r) +{ + uint32_t delta = *reloc; + + memset(r, 0, sizeof(*r)); + + if (base == 0) { + uint32_t handle = sizeof(uint32_t) * (reloc - kgem->batch); + struct kgem_bo *bo = NULL; + int i; + + for (i = 0; i < kgem->nreloc; i++) + if (kgem->reloc[i].offset == handle) + break; + assert(i < kgem->nreloc); + handle = kgem->reloc[i].target_handle; + delta = kgem->reloc[i].delta; + + if (handle == 0) { + base = kgem->batch; + } else { + list_for_each_entry(bo, &kgem->next_request->buffers, request) + if (bo->handle == handle) + break; + assert(&bo->request != &kgem->next_request->buffers); + base = kgem_bo_map(kgem, bo, PROT_READ); + r->bo = bo; + r->base = base; + } + } + + return (char *)base + delta; +} + +static void +put_reloc(struct kgem *kgem, struct reloc *r) +{ + if (r->bo != NULL) + munmap(r->base, r->bo->size); +} +#endif + +int kgem_gen5_decode_3d(struct kgem *kgem, uint32_t offset) +{ + static const struct { + uint32_t opcode; + int min_len; + int max_len; + const char *name; + } opcodes[] = { + { 0x6000, 3, 3, "URB_FENCE" }, + { 0x6001, 2, 2, "CS_URB_FENCE" }, + { 0x6002, 2, 2, "CONSTANT_BUFFER" }, + { 0x6101, 6, 6, "STATE_BASE_ADDRESS" }, + { 0x6102, 2, 2 , "STATE_SIP" }, + { 0x6104, 1, 1, "3DSTATE_PIPELINE_SELECT" }, + { 0x680b, 1, 1, "3DSTATE_VF_STATISTICS" }, + { 0x6904, 1, 1, "3DSTATE_PIPELINE_SELECT" }, + { 0x7800, 7, 7, "3DSTATE_PIPELINED_POINTERS" }, + { 0x7801, 6, 6, "3DSTATE_BINDING_TABLE_POINTERS" }, + { 0x7808, 5, 257, "3DSTATE_VERTEX_BUFFERS" }, + { 0x7809, 3, 256, "3DSTATE_VERTEX_ELEMENTS" }, + { 0x780a, 3, 3, "3DSTATE_INDEX_BUFFER" }, + { 0x780b, 1, 1, "3DSTATE_VF_STATISTICS" }, + { 0x7900, 4, 4, "3DSTATE_DRAWING_RECTANGLE" }, + { 0x7901, 5, 5, "3DSTATE_CONSTANT_COLOR" }, + { 0x7905, 5, 7, "3DSTATE_DEPTH_BUFFER" }, + { 0x7906, 2, 2, "3DSTATE_POLY_STIPPLE_OFFSET" }, + { 0x7907, 33, 33, "3DSTATE_POLY_STIPPLE_PATTERN" }, + { 0x7908, 3, 3, "3DSTATE_LINE_STIPPLE" }, + { 0x7909, 2, 2, "3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP" }, + { 0x7909, 2, 2, "3DSTATE_CLEAR_PARAMS" }, + { 0x790a, 3, 3, "3DSTATE_AA_LINE_PARAMETERS" }, + { 0x790b, 4, 4, "3DSTATE_GS_SVB_INDEX" }, + { 0x790d, 3, 3, "3DSTATE_MULTISAMPLE" }, + { 0x7910, 2, 2, "3DSTATE_CLEAR_PARAMS" }, + { 0x7b00, 6, 6, "3DPRIMITIVE" }, + { 0x7805, 3, 3, "3DSTATE_URB" }, + { 0x7815, 5, 5, "3DSTATE_CONSTANT_VS_STATE" }, + { 0x7816, 5, 5, "3DSTATE_CONSTANT_GS_STATE" }, + { 0x7817, 5, 5, "3DSTATE_CONSTANT_PS_STATE" }, + { 0x7818, 2, 2, "3DSTATE_SAMPLE_MASK" }, + }; + uint32_t *data = kgem->batch + offset; + uint32_t op; + unsigned int len; + int i; + char *desc1 = NULL; + + len = (data[0] & 0xff) + 2; + op = (data[0] & 0xffff0000) >> 16; + switch (op) { + case 0x6000: + assert(len == 3); + + kgem_debug_print(data, offset, 0, "URB_FENCE: %s%s%s%s%s%s\n", + (data[0] >> 13) & 1 ? "cs " : "", + (data[0] >> 12) & 1 ? "vfe " : "", + (data[0] >> 11) & 1 ? "sf " : "", + (data[0] >> 10) & 1 ? "clip " : "", + (data[0] >> 9) & 1 ? "gs " : "", + (data[0] >> 8) & 1 ? "vs " : ""); + kgem_debug_print(data, offset, 1, + "vs fence: %d, gs_fence: %d, clip_fence: %d\n", + data[1] & 0x3ff, + (data[1] >> 10) & 0x3ff, + (data[1] >> 20) & 0x3ff); + kgem_debug_print(data, offset, 2, + "sf fence: %d, vfe_fence: %d, cs_fence: %d\n", + data[2] & 0x3ff, + (data[2] >> 10) & 0x3ff, + (data[2] >> 20) & 0x7ff); + return len; + + case 0x6001: + kgem_debug_print(data, offset, 0, "CS_URB_STATE\n"); + kgem_debug_print(data, offset, 1, "entry_size: %d [%d bytes], n_entries: %d\n", + (data[1] >> 4) & 0x1f, + (((data[1] >> 4) & 0x1f) + 1) * 64, + data[1] & 0x7); + return len; + case 0x6002: + kgem_debug_print(data, offset, 0, "CONSTANT_BUFFER: %s\n", + (data[0] >> 8) & 1 ? "valid" : "invalid"); + kgem_debug_print(data, offset, 1, "offset: 0x%08x, length: %d bytes\n", + data[1] & ~0x3f, ((data[1] & 0x3f) + 1) * 64); + return len; + case 0x6101: + i = 0; + kgem_debug_print(data, offset, i++, "STATE_BASE_ADDRESS\n"); + assert(len == 8); + + state_base_out(data, offset, i++, "general"); + state_base_out(data, offset, i++, "surface"); + state_base_out(data, offset, i++, "media"); + state_base_out(data, offset, i++, "instruction"); + + state_max_out(data, offset, i++, "general"); + state_max_out(data, offset, i++, "media"); + state_max_out(data, offset, i++, "instruction"); + + return len; + + case 0x7801: + assert(len == 6); + + kgem_debug_print(data, offset, 0, + "3DSTATE_BINDING_TABLE_POINTERS\n"); + kgem_debug_print(data, offset, 1, "VS binding table\n"); + kgem_debug_print(data, offset, 2, "GS binding table\n"); + kgem_debug_print(data, offset, 3, "CLIP binding table\n"); + kgem_debug_print(data, offset, 4, "SF binding table\n"); + kgem_debug_print(data, offset, 5, "WM binding table\n"); + + return len; + + case 0x7808: + assert((len - 1) % 4 == 0); + kgem_debug_print(data, offset, 0, "3DSTATE_VERTEX_BUFFERS\n"); + + for (i = 1; i < len;) { + gen5_update_vertex_buffer(kgem, data + i); + + kgem_debug_print(data, offset, i, "buffer %d: %s, pitch %db\n", + data[i] >> 27, + data[i] & (1 << 20) ? "random" : "sequential", + data[i] & 0x07ff); + i++; + kgem_debug_print(data, offset, i++, "buffer address\n"); + kgem_debug_print(data, offset, i++, "max index\n"); + kgem_debug_print(data, offset, i++, "mbz\n"); + } + return len; + + case 0x7809: + assert((len + 1) % 2 == 0); + kgem_debug_print(data, offset, 0, "3DSTATE_VERTEX_ELEMENTS\n"); + + memset(state.ve, 0, sizeof(state.ve)); /* XXX? */ + for (i = 1; i < len;) { + gen5_update_vertex_elements(kgem, (i - 1)/2, data + i); + + kgem_debug_print(data, offset, i, "buffer %d: %svalid, type 0x%04x, " + "src offset 0x%04x bytes\n", + data[i] >> 27, + data[i] & (1 << 26) ? "" : "in", + (data[i] >> 16) & 0x1ff, + data[i] & 0x07ff); + i++; + kgem_debug_print(data, offset, i, "(%s, %s, %s, %s), " + "dst offset 0x%02x bytes\n", + get_965_element_component(data[i], 0), + get_965_element_component(data[i], 1), + get_965_element_component(data[i], 2), + get_965_element_component(data[i], 3), + (data[i] & 0xff) * 4); + i++; + } + state.num_ve = (len - 1) / 2; /* XXX? */ + return len; + + case 0x780a: + assert(len == 3); + kgem_debug_print(data, offset, 0, "3DSTATE_INDEX_BUFFER\n"); + kgem_debug_print(data, offset, 1, "beginning buffer address\n"); + kgem_debug_print(data, offset, 2, "ending buffer address\n"); + return len; + + case 0x7900: + assert(len == 4); + kgem_debug_print(data, offset, 0, + "3DSTATE_DRAWING_RECTANGLE\n"); + kgem_debug_print(data, offset, 1, "top left: %d,%d\n", + data[1] & 0xffff, + (data[1] >> 16) & 0xffff); + kgem_debug_print(data, offset, 2, "bottom right: %d,%d\n", + data[2] & 0xffff, + (data[2] >> 16) & 0xffff); + kgem_debug_print(data, offset, 3, "origin: %d,%d\n", + (int)data[3] & 0xffff, + ((int)data[3] >> 16) & 0xffff); + return len; + + case 0x7905: + assert(len == 7); + kgem_debug_print(data, offset, 0, + "3DSTATE_DEPTH_BUFFER\n"); + kgem_debug_print(data, offset, 1, "%s, %s, pitch = %d bytes, %stiled, HiZ %d, Seperate Stencil %d\n", + get_965_surfacetype(data[1] >> 29), + get_965_depthformat((data[1] >> 18) & 0x7), + (data[1] & 0x0001ffff) + 1, + data[1] & (1 << 27) ? "" : "not ", + (data[1] & (1 << 22)) != 0, + (data[1] & (1 << 21)) != 0); + kgem_debug_print(data, offset, 2, "depth offset\n"); + kgem_debug_print(data, offset, 3, "%dx%d\n", + ((data[3] & 0x0007ffc0) >> 6) + 1, + ((data[3] & 0xfff80000) >> 19) + 1); + kgem_debug_print(data, offset, 4, "volume depth\n"); + kgem_debug_print(data, offset, 5, "\n"); + kgem_debug_print(data, offset, 6, "\n"); + return len; + + case 0x7a00: + assert(len == 4 || len == 5); + switch ((data[1] >> 14) & 0x3) { + case 0: desc1 = "no write"; break; + case 1: desc1 = "qword write"; break; + case 2: desc1 = "PS_DEPTH_COUNT write"; break; + case 3: desc1 = "TIMESTAMP write"; break; + } + kgem_debug_print(data, offset, 0, "PIPE_CONTROL\n"); + kgem_debug_print(data, offset, 1, + "%s, %scs stall, %stlb invalidate, " + "%ssync gfdt, %sdepth stall, %sRC write flush, " + "%sinst flush, %sTC flush\n", + desc1, + data[1] & (1 << 20) ? "" : "no ", + data[1] & (1 << 18) ? "" : "no ", + data[1] & (1 << 17) ? "" : "no ", + data[1] & (1 << 13) ? "" : "no ", + data[1] & (1 << 12) ? "" : "no ", + data[1] & (1 << 11) ? "" : "no ", + data[1] & (1 << 10) ? "" : "no "); + if (len == 5) { + kgem_debug_print(data, offset, 2, "destination address\n"); + kgem_debug_print(data, offset, 3, "immediate dword low\n"); + kgem_debug_print(data, offset, 4, "immediate dword high\n"); + } else { + for (i = 2; i < len; i++) { + kgem_debug_print(data, offset, i, "\n"); + } + } + return len; + + case 0x7b00: + assert(len == 6); + kgem_debug_print(data, offset, 0, + "3DPRIMITIVE: %s %s\n", + get_965_prim_type(data[0]), + (data[0] & (1 << 15)) ? "random" : "sequential"); + kgem_debug_print(data, offset, 1, "vertex count\n"); + kgem_debug_print(data, offset, 2, "start vertex\n"); + kgem_debug_print(data, offset, 3, "instance count\n"); + kgem_debug_print(data, offset, 4, "start instance\n"); + kgem_debug_print(data, offset, 5, "index bias\n"); + primitive_out(kgem, data); + return len; + } + + /* For the rest, just dump the bytes */ + for (i = 0; i < ARRAY_SIZE(opcodes); i++) + if (op == opcodes[i].opcode) + break; + + assert(i < ARRAY_SIZE(opcodes)); + + len = 1; + kgem_debug_print(data, offset, 0, "%s\n", opcodes[i].name); + if (opcodes[i].max_len > 1) { + len = (data[0] & 0xff) + 2; + assert(len >= opcodes[i].min_len && + len <= opcodes[i].max_len); + } + + for (i = 1; i < len; i++) + kgem_debug_print(data, offset, i, "dword %d\n", i); + + return len; +} + +static void finish_vertex_buffers(struct kgem *kgem) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(state.vb); i++) + if (state.vb[i].current) + munmap(state.vb[i].base, state.vb[i].current->size); +} + +void kgem_gen5_finish_state(struct kgem *kgem) +{ + finish_vertex_buffers(kgem); + + if (state.dynamic_state.current) + munmap(state.dynamic_state.base, state.dynamic_state.current->size); + + memset(&state, 0, sizeof(state)); +} diff --git a/src/sna/kgem_debug_gen6.c b/src/sna/kgem_debug_gen6.c new file mode 100644 index 00000000..d441b536 --- /dev/null +++ b/src/sna/kgem_debug_gen6.c @@ -0,0 +1,1099 @@ +/* + * Copyright © 2007-2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Eric Anholt <eric@anholt.net> + * Chris Wilson <chris"chris-wilson.co.uk> + * + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <sys/mman.h> +#include <assert.h> + +#include "sna.h" +#include "sna_reg.h" +#include "gen6_render.h" + +#include "kgem_debug.h" + +static struct state { + struct vertex_buffer { + int handle; + void *base; + const char *ptr; + int pitch; + + struct kgem_bo *current; + } vb[33]; + struct vertex_elements { + int buffer; + int offset; + bool valid; + uint32_t type; + uint8_t swizzle[4]; + } ve[33]; + int num_ve; + + struct dynamic_state { + struct kgem_bo *current; + void *base, *ptr; + } dynamic_state; +} state; + +static void gen6_update_vertex_buffer(struct kgem *kgem, const uint32_t *data) +{ + uint32_t reloc = sizeof(uint32_t) * (&data[1] - kgem->batch); + struct kgem_bo *bo = NULL; + void *base, *ptr; + int i; + + for (i = 0; i < kgem->nreloc; i++) + if (kgem->reloc[i].offset == reloc) + break; + assert(i < kgem->nreloc); + reloc = kgem->reloc[i].target_handle; + + if (reloc == 0) { + base = kgem->batch; + } else { + list_for_each_entry(bo, &kgem->next_request->buffers, request) + if (bo->handle == reloc) + break; + assert(&bo->request != &kgem->next_request->buffers); + base = kgem_bo_map(kgem, bo, PROT_READ); + } + ptr = (char *)base + kgem->reloc[i].delta; + + i = data[0] >> 26; + if (state.vb[i].current) + munmap(state.vb[i].base, state.vb[i].current->size); + + state.vb[i].current = bo; + state.vb[i].base = base; + state.vb[i].ptr = ptr; + state.vb[i].pitch = data[0] & 0x7ff; +} + +static void gen6_update_dynamic_buffer(struct kgem *kgem, const uint32_t offset) +{ + uint32_t reloc = sizeof(uint32_t) * offset; + struct kgem_bo *bo = NULL; + void *base, *ptr; + int i; + + if ((kgem->batch[offset] & 1) == 0) + return; + + for (i = 0; i < kgem->nreloc; i++) + if (kgem->reloc[i].offset == reloc) + break; + if(i < kgem->nreloc) { + reloc = kgem->reloc[i].target_handle; + + if (reloc == 0) { + base = kgem->batch; + } else { + list_for_each_entry(bo, &kgem->next_request->buffers, request) + if (bo->handle == reloc) + break; + assert(&bo->request != &kgem->next_request->buffers); + base = kgem_bo_map(kgem, bo, PROT_READ); + } + ptr = (char *)base + (kgem->reloc[i].delta & ~1); + } else { + bo = NULL; + base = NULL; + ptr = NULL; + } + + if (state.dynamic_state.current) + munmap(state.dynamic_state.base, state.dynamic_state.current->size); + + state.dynamic_state.current = bo; + state.dynamic_state.base = base; + state.dynamic_state.ptr = ptr; +} + +static uint32_t +get_ve_component(uint32_t data, int component) +{ + return (data >> (16 + (3 - component) * 4)) & 0x7; +} + +static void gen6_update_vertex_elements(struct kgem *kgem, int id, const uint32_t *data) +{ + state.ve[id].buffer = data[0] >> 26; + state.ve[id].valid = !!(data[0] & (1 << 25)); + state.ve[id].type = (data[0] >> 16) & 0x1ff; + state.ve[id].offset = data[0] & 0x7ff; + state.ve[id].swizzle[0] = get_ve_component(data[1], 0); + state.ve[id].swizzle[1] = get_ve_component(data[1], 1); + state.ve[id].swizzle[2] = get_ve_component(data[1], 2); + state.ve[id].swizzle[3] = get_ve_component(data[1], 3); +} + +static void gen6_update_sf_state(struct kgem *kgem, uint32_t *data) +{ + state.num_ve = 1 + ((data[1] >> 22) & 0x3f); +} + +static void vertices_sint16_out(const struct vertex_elements *ve, const int16_t *v, int max) +{ + int c; + + ErrorF("("); + for (c = 0; c < max; c++) { + switch (ve->swizzle[c]) { + case 0: ErrorF("#"); break; + case 1: ErrorF("%d", v[c]); break; + case 2: ErrorF("0.0"); break; + case 3: ErrorF("1.0"); break; + case 4: ErrorF("0x1"); break; + case 5: break; + default: ErrorF("?"); + } + if (c < 3) + ErrorF(", "); + } + for (; c < 4; c++) { + switch (ve->swizzle[c]) { + case 0: ErrorF("#"); break; + case 1: ErrorF("1.0"); break; + case 2: ErrorF("0.0"); break; + case 3: ErrorF("1.0"); break; + case 4: ErrorF("0x1"); break; + case 5: break; + default: ErrorF("?"); + } + if (c < 3) + ErrorF(", "); + } + ErrorF(")"); +} + +static void vertices_float_out(const struct vertex_elements *ve, const float *f, int max) +{ + int c, o; + + ErrorF("("); + for (c = o = 0; c < 4 && o < max; c++) { + switch (ve->swizzle[c]) { + case 0: ErrorF("#"); break; + case 1: ErrorF("%f", f[o++]); break; + case 2: ErrorF("0.0"); break; + case 3: ErrorF("1.0"); break; + case 4: ErrorF("0x1"); break; + case 5: break; + default: ErrorF("?"); + } + if (c < 3) + ErrorF(", "); + } + for (; c < 4; c++) { + switch (ve->swizzle[c]) { + case 0: ErrorF("#"); break; + case 1: ErrorF("1.0"); break; + case 2: ErrorF("0.0"); break; + case 3: ErrorF("1.0"); break; + case 4: ErrorF("0x1"); break; + case 5: break; + default: ErrorF("?"); + } + if (c < 3) + ErrorF(", "); + } + ErrorF(")"); +} + +static void ve_out(const struct vertex_elements *ve, const void *ptr) +{ + switch (ve->type) { + case GEN6_SURFACEFORMAT_R32_FLOAT: + vertices_float_out(ve, ptr, 1); + break; + case GEN6_SURFACEFORMAT_R32G32_FLOAT: + vertices_float_out(ve, ptr, 2); + break; + case GEN6_SURFACEFORMAT_R32G32B32_FLOAT: + vertices_float_out(ve, ptr, 3); + break; + case GEN6_SURFACEFORMAT_R32G32B32A32_FLOAT: + vertices_float_out(ve, ptr, 4); + break; + case GEN6_SURFACEFORMAT_R16_SINT: + vertices_sint16_out(ve, ptr, 1); + break; + case GEN6_SURFACEFORMAT_R16G16_SINT: + vertices_sint16_out(ve, ptr, 2); + break; + case GEN6_SURFACEFORMAT_R16G16B16A16_SINT: + vertices_sint16_out(ve, ptr, 4); + break; + case GEN6_SURFACEFORMAT_R16_SSCALED: + vertices_sint16_out(ve, ptr, 1); + break; + case GEN6_SURFACEFORMAT_R16G16_SSCALED: + vertices_sint16_out(ve, ptr, 2); + break; + case GEN6_SURFACEFORMAT_R16G16B16A16_SSCALED: + vertices_sint16_out(ve, ptr, 4); + break; + } +} + +static void indirect_vertex_out(struct kgem *kgem, uint32_t v) +{ + int i = 1; + + do { + const struct vertex_elements *ve = &state.ve[i]; + const struct vertex_buffer *vb = &state.vb[ve->buffer]; + const void *ptr = vb->ptr + v * vb->pitch + ve->offset; + + if (!ve->valid) + continue; + + ve_out(ve, ptr); + + while (++i <= state.num_ve && !state.ve[i].valid) + ; + + if (i <= state.num_ve) + ErrorF(", "); + } while (i <= state.num_ve); +} + +static void primitive_out(struct kgem *kgem, uint32_t *data) +{ + int n; + + assert((data[0] & (1<<15)) == 0); /* XXX index buffers */ + + for (n = 0; n < data[1]; n++) { + int v = data[2] + n; + ErrorF(" [%d:%d] = ", n, v); + indirect_vertex_out(kgem, v); + ErrorF("\n"); + } +} + +static void finish_vertex_buffers(struct kgem *kgem) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(state.vb); i++) + if (state.vb[i].current) + munmap(state.vb[i].base, state.vb[i].current->size); +} + +static void finish_state(struct kgem *kgem) +{ + finish_vertex_buffers(kgem); + + if (state.dynamic_state.current) + munmap(state.dynamic_state.base, state.dynamic_state.current->size); + + memset(&state, 0, sizeof(state)); +} + +static void +state_base_out(uint32_t *data, uint32_t offset, unsigned int index, + char *name) +{ + if (data[index] & 1) + kgem_debug_print(data, offset, index, + "%s state base address 0x%08x\n", + name, data[index] & ~1); + else + kgem_debug_print(data, offset, index, + "%s state base not updated\n", + name); +} + +static void +state_max_out(uint32_t *data, uint32_t offset, unsigned int index, + char *name) +{ + if (data[index] == 1) + kgem_debug_print(data, offset, index, + "%s state upper bound disabled\n", name); + else if (data[index] & 1) + kgem_debug_print(data, offset, index, + "%s state upper bound 0x%08x\n", + name, data[index] & ~1); + else + kgem_debug_print(data, offset, index, + "%s state upper bound not updated\n", + name); +} + +static const char * +get_965_surfacetype(unsigned int surfacetype) +{ + switch (surfacetype) { + case 0: return "1D"; + case 1: return "2D"; + case 2: return "3D"; + case 3: return "CUBE"; + case 4: return "BUFFER"; + case 7: return "NULL"; + default: return "unknown"; + } +} + +static const char * +get_965_depthformat(unsigned int depthformat) +{ + switch (depthformat) { + case 0: return "s8_z24float"; + case 1: return "z32float"; + case 2: return "z24s8"; + case 5: return "z16"; + default: return "unknown"; + } +} + +static const char * +get_965_element_component(uint32_t data, int component) +{ + uint32_t component_control = (data >> (16 + (3 - component) * 4)) & 0x7; + + switch (component_control) { + case 0: + return "nostore"; + case 1: + switch (component) { + case 0: return "X"; + case 1: return "Y"; + case 2: return "Z"; + case 3: return "W"; + default: return "fail"; + } + case 2: + return "0.0"; + case 3: + return "1.0"; + case 4: + return "0x1"; + case 5: + return "VID"; + default: + return "fail"; + } +} + +static const char * +get_965_prim_type(uint32_t data) +{ + uint32_t primtype = (data >> 10) & 0x1f; + + switch (primtype) { + case 0x01: return "point list"; + case 0x02: return "line list"; + case 0x03: return "line strip"; + case 0x04: return "tri list"; + case 0x05: return "tri strip"; + case 0x06: return "tri fan"; + case 0x07: return "quad list"; + case 0x08: return "quad strip"; + case 0x09: return "line list adj"; + case 0x0a: return "line strip adj"; + case 0x0b: return "tri list adj"; + case 0x0c: return "tri strip adj"; + case 0x0d: return "tri strip reverse"; + case 0x0e: return "polygon"; + case 0x0f: return "rect list"; + case 0x10: return "line loop"; + case 0x11: return "point list bf"; + case 0x12: return "line strip cont"; + case 0x13: return "line strip bf"; + case 0x14: return "line strip cont bf"; + case 0x15: return "tri fan no stipple"; + default: return "fail"; + } +} + +struct reloc { + struct kgem_bo *bo; + void *base; +}; + +static void * +get_reloc(struct kgem *kgem, + void *base, const uint32_t *reloc, + struct reloc *r) +{ + uint32_t delta = *reloc; + + memset(r, 0, sizeof(*r)); + + if (base == 0) { + uint32_t handle = sizeof(uint32_t) * (reloc - kgem->batch); + struct kgem_bo *bo = NULL; + int i; + + for (i = 0; i < kgem->nreloc; i++) + if (kgem->reloc[i].offset == handle) + break; + assert(i < kgem->nreloc); + handle = kgem->reloc[i].target_handle; + delta = kgem->reloc[i].delta; + + if (handle == 0) { + base = kgem->batch; + } else { + list_for_each_entry(bo, &kgem->next_request->buffers, request) + if (bo->handle == handle) + break; + assert(&bo->request != &kgem->next_request->buffers); + base = kgem_bo_map(kgem, bo, PROT_READ); + r->bo = bo; + r->base = base; + } + } + + return (char *)base + (delta & ~3); +} + +static void +put_reloc(struct kgem *kgem, struct reloc *r) +{ + if (r->bo != NULL) + munmap(r->base, r->bo->size); +} + +static const char * +gen6_filter_to_string(uint32_t filter) +{ + switch (filter) { + default: + case GEN6_MAPFILTER_NEAREST: return "nearest"; + case GEN6_MAPFILTER_LINEAR: return "linear"; + } +} + +static const char * +gen6_repeat_to_string(uint32_t repeat) +{ + switch (repeat) { + default: + case GEN6_TEXCOORDMODE_CLAMP_BORDER: return "border"; + case GEN6_TEXCOORDMODE_WRAP: return "wrap"; + case GEN6_TEXCOORDMODE_CLAMP: return "clamp"; + case GEN6_TEXCOORDMODE_MIRROR: return "mirror"; + } +} + +static void +gen6_decode_sampler_state(struct kgem *kgem, const uint32_t *reloc) +{ + const struct gen6_sampler_state *ss; + struct reloc r; + const char *min, *mag; + const char *s_wrap, *t_wrap, *r_wrap; + + ss = get_reloc(kgem, state.dynamic_state.ptr, reloc, &r); + + min = gen6_filter_to_string(ss->ss0.min_filter); + mag = gen6_filter_to_string(ss->ss0.mag_filter); + + s_wrap = gen6_repeat_to_string(ss->ss1.s_wrap_mode); + t_wrap = gen6_repeat_to_string(ss->ss1.t_wrap_mode); + r_wrap = gen6_repeat_to_string(ss->ss1.r_wrap_mode); + + ErrorF(" Sampler 0:\n"); + ErrorF(" filter: min=%s, mag=%s\n", min, mag); + ErrorF(" wrap: s=%s, t=%s, r=%s\n", s_wrap, t_wrap, r_wrap); + + ss++; + min = gen6_filter_to_string(ss->ss0.min_filter); + mag = gen6_filter_to_string(ss->ss0.mag_filter); + + s_wrap = gen6_repeat_to_string(ss->ss1.s_wrap_mode); + t_wrap = gen6_repeat_to_string(ss->ss1.t_wrap_mode); + r_wrap = gen6_repeat_to_string(ss->ss1.r_wrap_mode); + + ErrorF(" Sampler 1:\n"); + ErrorF(" filter: min=%s, mag=%s\n", min, mag); + ErrorF(" wrap: s=%s, t=%s, r=%s\n", s_wrap, t_wrap, r_wrap); + + put_reloc(kgem, &r); +} + +static const char * +gen6_blend_factor_to_string(uint32_t v) +{ + switch (v) { +#define C(x) case GEN6_BLENDFACTOR_##x: return #x; + C(ONE); + C(SRC_COLOR); + C(SRC_ALPHA); + C(DST_ALPHA); + C(DST_COLOR); + C(SRC_ALPHA_SATURATE); + C(CONST_COLOR); + C(CONST_ALPHA); + C(SRC1_COLOR); + C(SRC1_ALPHA); + C(ZERO); + C(INV_SRC_COLOR); + C(INV_SRC_ALPHA); + C(INV_DST_ALPHA); + C(INV_DST_COLOR); + C(INV_CONST_COLOR); + C(INV_CONST_ALPHA); + C(INV_SRC1_COLOR); + C(INV_SRC1_ALPHA); +#undef C + default: return "???"; + } +} + +static const char * +gen6_blend_function_to_string(uint32_t v) +{ + switch (v) { +#define C(x) case GEN6_BLENDFUNCTION_##x: return #x; + C(ADD); + C(SUBTRACT); + C(REVERSE_SUBTRACT); + C(MIN); + C(MAX); +#undef C + default: return "???"; + } +} + +static void +gen6_decode_blend(struct kgem *kgem, const uint32_t *reloc) +{ + const struct gen6_blend_state *blend; + struct reloc r; + const char *dst, *src; + const char *func; + + blend = get_reloc(kgem, state.dynamic_state.ptr, reloc, &r); + + dst = gen6_blend_factor_to_string(blend->blend0.dest_blend_factor); + src = gen6_blend_factor_to_string(blend->blend0.source_blend_factor); + func = gen6_blend_function_to_string(blend->blend0.blend_func); + + ErrorF(" Blend (%s): function %s, src=%s, dst=%s\n", + blend->blend0.blend_enable ? "enabled" : "disabled", + func, src, dst); + + put_reloc(kgem, &r); +} + +int kgem_gen6_decode_3d(struct kgem *kgem, uint32_t offset) +{ + static const struct { + uint32_t opcode; + int min_len; + int max_len; + const char *name; + } opcodes[] = { + { 0x6101, 6, 6, "STATE_BASE_ADDRESS" }, + { 0x6102, 2, 2 , "STATE_SIP" }, + { 0x6104, 1, 1, "3DSTATE_PIPELINE_SELECT" }, + { 0x680b, 1, 1, "3DSTATE_VF_STATISTICS" }, + { 0x6904, 1, 1, "3DSTATE_PIPELINE_SELECT" }, + { 0x7800, 7, 7, "3DSTATE_PIPELINED_POINTERS" }, + { 0x7801, 6, 6, "3DSTATE_BINDING_TABLE_POINTERS" }, + { 0x7808, 5, 257, "3DSTATE_VERTEX_BUFFERS" }, + { 0x7809, 3, 256, "3DSTATE_VERTEX_ELEMENTS" }, + { 0x780a, 3, 3, "3DSTATE_INDEX_BUFFER" }, + { 0x780b, 1, 1, "3DSTATE_VF_STATISTICS" }, + { 0x7900, 4, 4, "3DSTATE_DRAWING_RECTANGLE" }, + { 0x7901, 5, 5, "3DSTATE_CONSTANT_COLOR" }, + { 0x7905, 5, 7, "3DSTATE_DEPTH_BUFFER" }, + { 0x7906, 2, 2, "3DSTATE_POLY_STIPPLE_OFFSET" }, + { 0x7907, 33, 33, "3DSTATE_POLY_STIPPLE_PATTERN" }, + { 0x7908, 3, 3, "3DSTATE_LINE_STIPPLE" }, + { 0x7909, 2, 2, "3DSTATE_GLOBAL_DEPTH_OFFSET_CLAMP" }, + { 0x7909, 2, 2, "3DSTATE_CLEAR_PARAMS" }, + { 0x790a, 3, 3, "3DSTATE_AA_LINE_PARAMETERS" }, + { 0x790b, 4, 4, "3DSTATE_GS_SVB_INDEX" }, + { 0x790d, 3, 3, "3DSTATE_MULTISAMPLE" }, + { 0x7910, 2, 2, "3DSTATE_CLEAR_PARAMS" }, + { 0x7b00, 6, 6, "3DPRIMITIVE" }, + { 0x7802, 4, 4, "3DSTATE_SAMPLER_STATE_POINTERS" }, + { 0x7805, 3, 3, "3DSTATE_URB" }, + { 0x780d, 4, 4, "3DSTATE_VIEWPORT_STATE_POINTERS" }, + { 0x780e, 4, 4, "3DSTATE_CC_STATE_POINTERS" }, + { 0x780f, 2, 2, "3DSTATE_SCISSOR_STATE_POINTERS" }, + { 0x7810, 6, 6, "3DSTATE_VS_STATE" }, + { 0x7811, 7, 7, "3DSTATE_GS_STATE" }, + { 0x7812, 4, 4, "3DSTATE_CLIP_STATE" }, + { 0x7813, 20, 20, "3DSTATE_SF_STATE" }, + { 0x7814, 9, 9, "3DSTATE_WM_STATE" }, + { 0x7815, 5, 5, "3DSTATE_CONSTANT_VS_STATE" }, + { 0x7816, 5, 5, "3DSTATE_CONSTANT_GS_STATE" }, + { 0x7817, 5, 5, "3DSTATE_CONSTANT_WM_STATE" }, + { 0x7818, 2, 2, "3DSTATE_SAMPLE_MASK" }, + }; + uint32_t *data = kgem->batch + offset; + uint32_t op; + unsigned int len; + int i, j; + char *desc1 = NULL; + + len = (data[0] & 0xff) + 2; + op = (data[0] & 0xffff0000) >> 16; + switch (op) { + case 0x6101: + i = 0; + kgem_debug_print(data, offset, i++, "STATE_BASE_ADDRESS\n"); + if (kgem->gen >= 60) { + assert(len == 10); + + state_base_out(data, offset, i++, "general"); + state_base_out(data, offset, i++, "surface"); + state_base_out(data, offset, i++, "dynamic"); + state_base_out(data, offset, i++, "indirect"); + state_base_out(data, offset, i++, "instruction"); + + state_max_out(data, offset, i++, "general"); + state_max_out(data, offset, i++, "dynamic"); + state_max_out(data, offset, i++, "indirect"); + state_max_out(data, offset, i++, "instruction"); + + gen6_update_dynamic_buffer(kgem, offset + 3); + } else if (kgem->gen >= 50) { + assert(len == 8); + + state_base_out(data, offset, i++, "general"); + state_base_out(data, offset, i++, "surface"); + state_base_out(data, offset, i++, "media"); + state_base_out(data, offset, i++, "instruction"); + + state_max_out(data, offset, i++, "general"); + state_max_out(data, offset, i++, "media"); + state_max_out(data, offset, i++, "instruction"); + } + + return len; + + case 0x7801: + if (kgem->gen >= 60) { + assert(len == 4); + + kgem_debug_print(data, offset, 0, + "3DSTATE_BINDING_TABLE_POINTERS: VS mod %d, " + "GS mod %d, WM mod %d\n", + (data[0] & (1 << 8)) != 0, + (data[0] & (1 << 9)) != 0, + (data[0] & (1 << 12)) != 0); + kgem_debug_print(data, offset, 1, "VS binding table\n"); + kgem_debug_print(data, offset, 2, "GS binding table\n"); + kgem_debug_print(data, offset, 3, "WM binding table\n"); + } else if (kgem->gen >= 40) { + assert(len == 6); + + kgem_debug_print(data, offset, 0, + "3DSTATE_BINDING_TABLE_POINTERS\n"); + kgem_debug_print(data, offset, 1, "VS binding table\n"); + kgem_debug_print(data, offset, 2, "GS binding table\n"); + kgem_debug_print(data, offset, 3, "CLIP binding table\n"); + kgem_debug_print(data, offset, 4, "SF binding table\n"); + kgem_debug_print(data, offset, 5, "WM binding table\n"); + } + + return len; + + case 0x7802: + assert(len == 4); + kgem_debug_print(data, offset, 0, "3DSTATE_SAMPLER_STATE_POINTERS: VS mod %d, " + "GS mod %d, WM mod %d\n", + (data[0] & (1 << 8)) != 0, + (data[0] & (1 << 9)) != 0, + (data[0] & (1 << 12)) != 0); + kgem_debug_print(data, offset, 1, "VS sampler state\n"); + kgem_debug_print(data, offset, 2, "GS sampler state\n"); + kgem_debug_print(data, offset, 3, "WM sampler state\n"); + gen6_decode_sampler_state(kgem, &data[3]); + return len; + + case 0x7808: + assert((len - 1) % 4 == 0); + kgem_debug_print(data, offset, 0, "3DSTATE_VERTEX_BUFFERS\n"); + + for (i = 1; i < len;) { + gen6_update_vertex_buffer(kgem, data + i); + + kgem_debug_print(data, offset, i, "buffer %d: %s, pitch %db\n", + data[i] >> 26, + data[i] & (1 << 20) ? "random" : "sequential", + data[i] & 0x07ff); + i++; + kgem_debug_print(data, offset, i++, "buffer address\n"); + kgem_debug_print(data, offset, i++, "max index\n"); + kgem_debug_print(data, offset, i++, "mbz\n"); + } + return len; + + case 0x7809: + assert((len + 1) % 2 == 0); + kgem_debug_print(data, offset, 0, "3DSTATE_VERTEX_ELEMENTS\n"); + + for (i = 1; i < len;) { + gen6_update_vertex_elements(kgem, (i - 1)/2, data + i); + + kgem_debug_print(data, offset, i, "buffer %d: %svalid, type 0x%04x, " + "src offset 0x%04x bytes\n", + data[i] >> 26, + data[i] & (1 << 25) ? "" : "in", + (data[i] >> 16) & 0x1ff, + data[i] & 0x07ff); + i++; + kgem_debug_print(data, offset, i, "(%s, %s, %s, %s), " + "dst offset 0x%02x bytes\n", + get_965_element_component(data[i], 0), + get_965_element_component(data[i], 1), + get_965_element_component(data[i], 2), + get_965_element_component(data[i], 3), + (data[i] & 0xff) * 4); + i++; + } + return len; + + case 0x780d: + assert(len == 4); + kgem_debug_print(data, offset, 0, "3DSTATE_VIEWPORT_STATE_POINTERS\n"); + kgem_debug_print(data, offset, 1, "clip\n"); + kgem_debug_print(data, offset, 2, "sf\n"); + kgem_debug_print(data, offset, 3, "cc\n"); + return len; + + case 0x780a: + assert(len == 3); + kgem_debug_print(data, offset, 0, "3DSTATE_INDEX_BUFFER\n"); + kgem_debug_print(data, offset, 1, "beginning buffer address\n"); + kgem_debug_print(data, offset, 2, "ending buffer address\n"); + return len; + + case 0x780e: + assert(len == 4); + kgem_debug_print(data, offset, 0, "3DSTATE_CC_STATE_POINTERS\n"); + kgem_debug_print(data, offset, 1, "blend%s\n", + data[1] & 1 ? " update" : ""); + if (data[1] & 1) + gen6_decode_blend(kgem, data+1); + kgem_debug_print(data, offset, 2, "depth+stencil%s\n", + data[2] & 1 ? " update" : ""); + kgem_debug_print(data, offset, 3, "cc%s\n", + data[3] & 1 ? " update" : ""); + return len; + + case 0x780f: + assert(len == 2); + kgem_debug_print(data, offset, 0, "3DSTATE_SCISSOR_POINTERS\n"); + kgem_debug_print(data, offset, 1, "scissor rect offset\n"); + return len; + + case 0x7810: + assert(len == 6); + kgem_debug_print(data, offset, 0, "3DSTATE_VS\n"); + kgem_debug_print(data, offset, 1, "kernel pointer\n"); + kgem_debug_print(data, offset, 2, "SPF=%d, VME=%d, Sampler Count %d, " + "Binding table count %d\n", + (data[2] >> 31) & 1, + (data[2] >> 30) & 1, + (data[2] >> 27) & 7, + (data[2] >> 18) & 0xff); + kgem_debug_print(data, offset, 3, "scratch offset\n"); + kgem_debug_print(data, offset, 4, "Dispatch GRF start %d, VUE read length %d, " + "VUE read offset %d\n", + (data[4] >> 20) & 0x1f, + (data[4] >> 11) & 0x3f, + (data[4] >> 4) & 0x3f); + kgem_debug_print(data, offset, 5, "Max Threads %d, Vertex Cache %sable, " + "VS func %sable\n", + ((data[5] >> 25) & 0x7f) + 1, + (data[5] & (1 << 1)) != 0 ? "dis" : "en", + (data[5] & 1) != 0 ? "en" : "dis"); + return len; + + case 0x7811: + assert(len == 7); + kgem_debug_print(data, offset, 0, "3DSTATE_GS\n"); + kgem_debug_print(data, offset, 1, "kernel pointer\n"); + kgem_debug_print(data, offset, 2, "SPF=%d, VME=%d, Sampler Count %d, " + "Binding table count %d\n", + (data[2] >> 31) & 1, + (data[2] >> 30) & 1, + (data[2] >> 27) & 7, + (data[2] >> 18) & 0xff); + kgem_debug_print(data, offset, 3, "scratch offset\n"); + kgem_debug_print(data, offset, 4, "Dispatch GRF start %d, VUE read length %d, " + "VUE read offset %d\n", + (data[4] & 0xf), + (data[4] >> 11) & 0x3f, + (data[4] >> 4) & 0x3f); + kgem_debug_print(data, offset, 5, "Max Threads %d, Rendering %sable\n", + ((data[5] >> 25) & 0x7f) + 1, + (data[5] & (1 << 8)) != 0 ? "en" : "dis"); + kgem_debug_print(data, offset, 6, "Reorder %sable, Discard Adjaceny %sable, " + "GS %sable\n", + (data[6] & (1 << 30)) != 0 ? "en" : "dis", + (data[6] & (1 << 29)) != 0 ? "en" : "dis", + (data[6] & (1 << 15)) != 0 ? "en" : "dis"); + return len; + + case 0x7812: + assert(len == 4); + kgem_debug_print(data, offset, 0, "3DSTATE_CLIP\n"); + kgem_debug_print(data, offset, 1, "UserClip distance cull test mask 0x%x\n", + data[1] & 0xff); + kgem_debug_print(data, offset, 2, "Clip %sable, API mode %s, Viewport XY test %sable, " + "Viewport Z test %sable, Guardband test %sable, Clip mode %d, " + "Perspective Divide %sable, Non-Perspective Barycentric %sable, " + "Tri Provoking %d, Line Provoking %d, Trifan Provoking %d\n", + (data[2] & (1 << 31)) != 0 ? "en" : "dis", + (data[2] & (1 << 30)) != 0 ? "D3D" : "OGL", + (data[2] & (1 << 28)) != 0 ? "en" : "dis", + (data[2] & (1 << 27)) != 0 ? "en" : "dis", + (data[2] & (1 << 26)) != 0 ? "en" : "dis", + (data[2] >> 13) & 7, + (data[2] & (1 << 9)) != 0 ? "dis" : "en", + (data[2] & (1 << 8)) != 0 ? "en" : "dis", + (data[2] >> 4) & 3, + (data[2] >> 2) & 3, + (data[2] & 3)); + kgem_debug_print(data, offset, 3, "Min PointWidth %d, Max PointWidth %d, " + "Force Zero RTAIndex %sable, Max VPIndex %d\n", + (data[3] >> 17) & 0x7ff, + (data[3] >> 6) & 0x7ff, + (data[3] & (1 << 5)) != 0 ? "en" : "dis", + (data[3] & 0xf)); + return len; + + case 0x7813: + gen6_update_sf_state(kgem, data); + assert(len == 20); + kgem_debug_print(data, offset, 0, "3DSTATE_SF\n"); + kgem_debug_print(data, offset, 1, "Attrib Out %d, Attrib Swizzle %sable, VUE read length %d, " + "VUE read offset %d\n", + (data[1] >> 22) & 0x3f, + (data[1] & (1 << 21)) != 0 ? "en" : "dis", + (data[1] >> 11) & 0x1f, + (data[1] >> 4) & 0x3f); + kgem_debug_print(data, offset, 2, "Legacy Global DepthBias %sable, FrontFace fill %d, BF fill %d, " + "VP transform %sable, FrontWinding_%s\n", + (data[2] & (1 << 11)) != 0 ? "en" : "dis", + (data[2] >> 5) & 3, + (data[2] >> 3) & 3, + (data[2] & (1 << 1)) != 0 ? "en" : "dis", + (data[2] & 1) != 0 ? "CCW" : "CW"); + kgem_debug_print(data, offset, 3, "AA %sable, CullMode %d, Scissor %sable, Multisample m ode %d\n", + (data[3] & (1 << 31)) != 0 ? "en" : "dis", + (data[3] >> 29) & 3, + (data[3] & (1 << 11)) != 0 ? "en" : "dis", + (data[3] >> 8) & 3); + kgem_debug_print(data, offset, 4, "Last Pixel %sable, SubPixel Precision %d, Use PixelWidth %d\n", + (data[4] & (1 << 31)) != 0 ? "en" : "dis", + (data[4] & (1 << 12)) != 0 ? 4 : 8, + (data[4] & (1 << 11)) != 0); + kgem_debug_print(data, offset, 5, "Global Depth Offset Constant %f\n", data[5]); + kgem_debug_print(data, offset, 6, "Global Depth Offset Scale %f\n", data[6]); + kgem_debug_print(data, offset, 7, "Global Depth Offset Clamp %f\n", data[7]); + for (i = 0, j = 0; i < 8; i++, j+=2) + kgem_debug_print(data, offset, i+8, "Attrib %d (Override %s%s%s%s, Const Source %d, Swizzle Select %d, " + "Source %d); Attrib %d (Override %s%s%s%s, Const Source %d, Swizzle Select %d, Source %d)\n", + j+1, + (data[8+i] & (1 << 31)) != 0 ? "W":"", + (data[8+i] & (1 << 30)) != 0 ? "Z":"", + (data[8+i] & (1 << 29)) != 0 ? "Y":"", + (data[8+i] & (1 << 28)) != 0 ? "X":"", + (data[8+i] >> 25) & 3, (data[8+i] >> 22) & 3, + (data[8+i] >> 16) & 0x1f, + j, + (data[8+i] & (1 << 15)) != 0 ? "W":"", + (data[8+i] & (1 << 14)) != 0 ? "Z":"", + (data[8+i] & (1 << 13)) != 0 ? "Y":"", + (data[8+i] & (1 << 12)) != 0 ? "X":"", + (data[8+i] >> 9) & 3, (data[8+i] >> 6) & 3, + (data[8+i] & 0x1f)); + kgem_debug_print(data, offset, 16, "Point Sprite TexCoord Enable\n"); + kgem_debug_print(data, offset, 17, "Const Interp Enable\n"); + kgem_debug_print(data, offset, 18, "Attrib 7-0 WrapShortest Enable\n"); + kgem_debug_print(data, offset, 19, "Attrib 15-8 WrapShortest Enable\n"); + + return len; + + case 0x7814: + assert(len == 9); + kgem_debug_print(data, offset, 0, "3DSTATE_WM\n"); + kgem_debug_print(data, offset, 1, "kernel start pointer 0\n"); + kgem_debug_print(data, offset, 2, "SPF=%d, VME=%d, Sampler Count %d, " + "Binding table count %d\n", + (data[2] >> 31) & 1, + (data[2] >> 30) & 1, + (data[2] >> 27) & 7, + (data[2] >> 18) & 0xff); + kgem_debug_print(data, offset, 3, "scratch offset\n"); + kgem_debug_print(data, offset, 4, "Depth Clear %d, Depth Resolve %d, HiZ Resolve %d, " + "Dispatch GRF start[0] %d, start[1] %d, start[2] %d\n", + (data[4] & (1 << 30)) != 0, + (data[4] & (1 << 28)) != 0, + (data[4] & (1 << 27)) != 0, + (data[4] >> 16) & 0x7f, + (data[4] >> 8) & 0x7f, + (data[4] & 0x7f)); + kgem_debug_print(data, offset, 5, "MaxThreads %d, PS KillPixel %d, PS computed Z %d, " + "PS use sourceZ %d, Thread Dispatch %d, PS use sourceW %d, Dispatch32 %d, " + "Dispatch16 %d, Dispatch8 %d\n", + ((data[5] >> 25) & 0x7f) + 1, + (data[5] & (1 << 22)) != 0, + (data[5] & (1 << 21)) != 0, + (data[5] & (1 << 20)) != 0, + (data[5] & (1 << 19)) != 0, + (data[5] & (1 << 8)) != 0, + (data[5] & (1 << 2)) != 0, + (data[5] & (1 << 1)) != 0, + (data[5] & (1 << 0)) != 0); + kgem_debug_print(data, offset, 6, "Num SF output %d, Pos XY offset %d, ZW interp mode %d , " + "Barycentric interp mode 0x%x, Point raster rule %d, Multisample mode %d, " + "Multisample Dispatch mode %d\n", + (data[6] >> 20) & 0x3f, + (data[6] >> 18) & 3, + (data[6] >> 16) & 3, + (data[6] >> 10) & 0x3f, + (data[6] & (1 << 9)) != 0, + (data[6] >> 1) & 3, + (data[6] & 1)); + kgem_debug_print(data, offset, 7, "kernel start pointer 1\n"); + kgem_debug_print(data, offset, 8, "kernel start pointer 2\n"); + + return len; + + case 0x7900: + assert(len == 4); + kgem_debug_print(data, offset, 0, + "3DSTATE_DRAWING_RECTANGLE\n"); + kgem_debug_print(data, offset, 1, "top left: %d, %d\n", + (uint16_t)(data[1] & 0xffff), + (uint16_t)(data[1] >> 16)); + kgem_debug_print(data, offset, 2, "bottom right: %d, %d\n", + (uint16_t)(data[2] & 0xffff), + (uint16_t)(data[2] >> 16)); + kgem_debug_print(data, offset, 3, "origin: %d, %d\n", + (int16_t)(data[3] & 0xffff), + (int16_t)(data[3] >> 16)); + return len; + + case 0x7905: + assert(len == 7); + kgem_debug_print(data, offset, 0, + "3DSTATE_DEPTH_BUFFER\n"); + kgem_debug_print(data, offset, 1, "%s, %s, pitch = %d bytes, %stiled, HiZ %d, Seperate Stencil %d\n", + get_965_surfacetype(data[1] >> 29), + get_965_depthformat((data[1] >> 18) & 0x7), + (data[1] & 0x0001ffff) + 1, + data[1] & (1 << 27) ? "" : "not ", + (data[1] & (1 << 22)) != 0, + (data[1] & (1 << 21)) != 0); + kgem_debug_print(data, offset, 2, "depth offset\n"); + kgem_debug_print(data, offset, 3, "%dx%d\n", + ((data[3] & 0x0007ffc0) >> 6) + 1, + ((data[3] & 0xfff80000) >> 19) + 1); + kgem_debug_print(data, offset, 4, "volume depth\n"); + kgem_debug_print(data, offset, 5, "\n"); + kgem_debug_print(data, offset, 6, "\n"); + return len; + + case 0x7a00: + assert(len == 4 || len == 5); + switch ((data[1] >> 14) & 0x3) { + case 0: desc1 = "no write"; break; + case 1: desc1 = "qword write"; break; + case 2: desc1 = "PS_DEPTH_COUNT write"; break; + case 3: desc1 = "TIMESTAMP write"; break; + } + kgem_debug_print(data, offset, 0, "PIPE_CONTROL\n"); + kgem_debug_print(data, offset, 1, + "%s, %scs stall, %stlb invalidate, " + "%ssync gfdt, %sdepth stall, %sRC write flush, " + "%sinst flush, %sTC flush\n", + desc1, + data[1] & (1 << 20) ? "" : "no ", + data[1] & (1 << 18) ? "" : "no ", + data[1] & (1 << 17) ? "" : "no ", + data[1] & (1 << 13) ? "" : "no ", + data[1] & (1 << 12) ? "" : "no ", + data[1] & (1 << 11) ? "" : "no ", + data[1] & (1 << 10) ? "" : "no "); + if (len == 5) { + kgem_debug_print(data, offset, 2, "destination address\n"); + kgem_debug_print(data, offset, 3, "immediate dword low\n"); + kgem_debug_print(data, offset, 4, "immediate dword high\n"); + } else { + for (i = 2; i < len; i++) { + kgem_debug_print(data, offset, i, "\n"); + } + } + return len; + + case 0x7b00: + assert(len == 6); + kgem_debug_print(data, offset, 0, + "3DPRIMITIVE: %s %s\n", + get_965_prim_type(data[0]), + (data[0] & (1 << 15)) ? "random" : "sequential"); + kgem_debug_print(data, offset, 1, "vertex count\n"); + kgem_debug_print(data, offset, 2, "start vertex\n"); + kgem_debug_print(data, offset, 3, "instance count\n"); + kgem_debug_print(data, offset, 4, "start instance\n"); + kgem_debug_print(data, offset, 5, "index bias\n"); + primitive_out(kgem, data); + return len; + } + + /* For the rest, just dump the bytes */ + for (i = 0; i < ARRAY_SIZE(opcodes); i++) + if (op == opcodes[i].opcode) + break; + + assert(i < ARRAY_SIZE(opcodes)); + + len = 1; + kgem_debug_print(data, offset, 0, "%s\n", opcodes[i].name); + if (opcodes[i].max_len > 1) { + len = (data[0] & 0xff) + 2; + assert(len >= opcodes[i].min_len && + len <= opcodes[i].max_len); + } + + for (i = 1; i < len; i++) + kgem_debug_print(data, offset, i, "dword %d\n", i); + + return len; +} + +void kgem_gen6_finish_state(struct kgem *kgem) +{ + finish_state(kgem); +} diff --git a/src/sna/sna.h b/src/sna/sna.h new file mode 100644 index 00000000..cb4b61ae --- /dev/null +++ b/src/sna/sna.h @@ -0,0 +1,596 @@ +/************************************************************************** + +Copyright 1998-1999 Precision Insight, Inc., Cedar Park, Texas. +Copyright © 2002 David Dawes + +All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sub license, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial portions +of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. +IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR +ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/* + * Authors: + * Keith Whitwell <keith@tungstengraphics.com> + * David Dawes <dawes@xfree86.org> + * + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <stdint.h> + +#ifndef _SNA_H_ +#define _SNA_H_ + +#include "xf86_OSproc.h" +#include "compiler.h" +#include "xf86PciInfo.h" +#include "xf86Pci.h" +#include "xf86Cursor.h" +#include "xf86xv.h" +#include "vgaHW.h" +#include "xf86Crtc.h" +#include "xf86RandR12.h" + +#include "xorg-server.h" +#include <pciaccess.h> + +#include "xf86drm.h" +#include "xf86drmMode.h" + +#define _XF86DRI_SERVER_ +#include "dri.h" +#include "dri2.h" +#include "i915_drm.h" + +#if HAVE_UDEV +#include <libudev.h> +#endif + +#define DBG(x) + +#define DEBUG_ALL 0 +#define DEBUG_ACCEL (DEBUG_ALL || 0) +#define DEBUG_BATCH (DEBUG_ALL || 0) +#define DEBUG_BLT (DEBUG_ALL || 0) +#define DEBUG_COMPOSITE (DEBUG_ALL || 0) +#define DEBUG_DAMAGE (DEBUG_ALL || 0) +#define DEBUG_DISPLAY (DEBUG_ALL || 0) +#define DEBUG_DRI (DEBUG_ALL || 0) +#define DEBUG_GRADIENT (DEBUG_ALL || 0) +#define DEBUG_GLYPHS (DEBUG_ALL || 0) +#define DEBUG_IO (DEBUG_ALL || 0) +#define DEBUG_KGEM (DEBUG_ALL || 0) +#define DEBUG_RENDER (DEBUG_ALL || 0) +#define DEBUG_STREAM (DEBUG_ALL || 0) +#define DEBUG_TRAPEZOIDS (DEBUG_ALL || 0) +#define DEBUG_VIDEO (DEBUG_ALL || 0) +#define DEBUG_VIDEO_TEXTURED (DEBUG_ALL || 0) +#define DEBUG_VIDEO_OVERLAY (DEBUG_ALL || 0) + +#define DEBUG_NO_RENDER 0 +#define DEBUG_NO_BLT 0 +#define DEBUG_NO_IO 0 + +#define DEBUG_FLUSH_CACHE 0 +#define DEBUG_FLUSH_BATCH 0 +#define DEBUG_FLUSH_SYNC 0 + +#define TEST_ALL 0 +#define TEST_ACCEL (TEST_ALL || 0) +#define TEST_BATCH (TEST_ALL || 0) +#define TEST_BLT (TEST_ALL || 0) +#define TEST_COMPOSITE (TEST_ALL || 0) +#define TEST_DAMAGE (TEST_ALL || 0) +#define TEST_GRADIENT (TEST_ALL || 0) +#define TEST_GLYPHS (TEST_ALL || 0) +#define TEST_IO (TEST_ALL || 0) +#define TEST_KGEM (TEST_ALL || 0) +#define TEST_RENDER (TEST_ALL || 0) + +#include "intel_driver.h" +#include "kgem.h" +#include "sna_damage.h" +#include "sna_render.h" + +static inline void list_add_tail(struct list *new, struct list *head) +{ + __list_add(new, head->prev, head); +} + +enum DRI2FrameEventType { + DRI2_SWAP, + DRI2_ASYNC_SWAP, + DRI2_FLIP, + DRI2_WAITMSC, +}; + +#ifndef CREATE_PIXMAP_USAGE_SCRATCH_HEADER +#define CREATE_PIXMAP_USAGE_SCRATCH_HEADER -1 +#endif + +typedef struct _DRI2FrameEvent { + XID drawable_id; + XID client_id; /* fake client ID to track client destruction */ + ClientPtr client; + enum DRI2FrameEventType type; + int frame; + int pipe; + + /* for swaps & flips only */ + DRI2SwapEventPtr event_complete; + void *event_data; + DRI2BufferPtr front; + DRI2BufferPtr back; +} DRI2FrameEventRec, *DRI2FrameEventPtr; + +#define SNA_CURSOR_X 64 +#define SNA_CURSOR_Y SNA_CURSOR_X + +struct sna_pixmap { + PixmapPtr pixmap; + struct kgem_bo *gpu_bo, *cpu_bo; + struct sna_damage *gpu_damage, *cpu_damage; + + struct list list; + +#define SOURCE_BIAS 4 + uint16_t source_count; + uint8_t mapped :1; + uint8_t pinned :1; + uint8_t gpu_only :1; + uint8_t flush :1; +}; + +static inline PixmapPtr get_drawable_pixmap(DrawablePtr drawable) +{ + ScreenPtr screen = drawable->pScreen; + + if (drawable->type == DRAWABLE_PIXMAP) + return (PixmapPtr)drawable; + else + return screen->GetWindowPixmap((WindowPtr)drawable); +} + +extern DevPrivateKeyRec sna_pixmap_index; + +static inline struct sna_pixmap *sna_pixmap(PixmapPtr pixmap) +{ + return dixGetPrivate(&pixmap->devPrivates, &sna_pixmap_index); +} + +static inline struct sna_pixmap *sna_pixmap_from_drawable(DrawablePtr drawable) +{ + return sna_pixmap(get_drawable_pixmap(drawable)); +} + +static inline void sna_set_pixmap(PixmapPtr pixmap, struct sna_pixmap *sna) +{ + dixSetPrivate(&pixmap->devPrivates, &sna_pixmap_index, sna); +} + +enum { + OPTION_TILING_FB, + OPTION_TILING_2D, + OPTION_PREFER_OVERLAY, + OPTION_COLOR_KEY, + OPTION_VIDEO_KEY, + OPTION_SWAPBUFFERS_WAIT, + OPTION_HOTPLUG, + OPTION_THROTTLE, + OPTION_RELAXED_FENCING, + OPTION_VMAP, +}; + +enum { + FLUSH_TIMER = 0, + EXPIRE_TIMER, + NUM_TIMERS +}; + +struct sna { + ScrnInfoPtr scrn; + + unsigned flags; +#define SNA_NO_THROTTLE 0x1 +#define SNA_SWAP_WAIT 0x2 + + int timer[NUM_TIMERS]; + int timer_active; + + struct list deferred_free; + struct list dirty_pixmaps; + + PixmapPtr front, shadow; + + struct sna_mode { + uint32_t fb_id; + drmModeResPtr mode_res; + int cpp; + + drmEventContext event_context; + DRI2FrameEventPtr flip_info; + int flip_count; + int flip_pending[2]; + unsigned int fe_frame; + unsigned int fe_tv_sec; + unsigned int fe_tv_usec; + + struct list outputs; + struct list crtcs; + } mode; + + unsigned int tiling; +#define SNA_TILING_FB 0x1 +#define SNA_TILING_2D 0x2 +#define SNA_TILING_3D 0x4 +#define SNA_TILING_ALL (~0) + + int Chipset; + EntityInfoPtr pEnt; + struct pci_device *PciInfo; + struct intel_chipset chipset; + + ScreenBlockHandlerProcPtr BlockHandler; + ScreenWakeupHandlerProcPtr WakeupHandler; + CloseScreenProcPtr CloseScreen; + + union { + struct gen2_render_state gen2; + struct gen3_render_state gen3; + struct gen4_render_state gen4; + struct gen5_render_state gen5; + struct gen6_render_state gen6; + } render_state; + uint32_t have_render; + + Bool directRenderingOpen; + char *deviceName; + + /* Broken-out options. */ + OptionInfoPtr Options; + + /* Driver phase/state information */ + Bool suspended; + +#if HAVE_UDEV + struct udev_monitor *uevent_monitor; + InputHandlerProc uevent_handler; +#endif + + struct kgem kgem; + struct sna_render render; +}; + +Bool sna_mode_pre_init(ScrnInfoPtr scrn, struct sna *sna); +extern void sna_mode_init(struct sna *sna); +extern void sna_mode_remove_fb(struct sna *sna); +extern void sna_mode_fini(struct sna *sna); + +extern int sna_crtc_id(xf86CrtcPtr crtc); +extern int sna_output_dpms_status(xf86OutputPtr output); + +extern Bool sna_do_pageflip(struct sna *sna, + PixmapPtr pixmap, + DRI2FrameEventPtr flip_info, int ref_crtc_hw_id); + +static inline struct sna * +to_sna(ScrnInfoPtr scrn) +{ + return (struct sna *)(scrn->driverPrivate); +} + +static inline struct sna * +to_sna_from_screen(ScreenPtr screen) +{ + return to_sna(xf86Screens[screen->myNum]); +} + +static inline struct sna * +to_sna_from_drawable(DrawablePtr drawable) +{ + return to_sna_from_screen(drawable->pScreen); +} + +#ifndef ARRAY_SIZE +#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0])) +#endif +#define ALIGN(i,m) (((i) + (m) - 1) & ~((m) - 1)) +#define MIN(a,b) ((a) < (b) ? (a) : (b)) + +extern xf86CrtcPtr sna_covering_crtc(ScrnInfoPtr scrn, BoxPtr box, + xf86CrtcPtr desired, BoxPtr crtc_box_ret); + +extern bool sna_wait_for_scanline(struct sna *sna, PixmapPtr pixmap, + xf86CrtcPtr crtc, RegionPtr clip); + +Bool sna_dri2_open(struct sna *sna, ScreenPtr pScreen); +void sna_dri2_close(struct sna *sna, ScreenPtr pScreen); +void sna_dri2_frame_event(unsigned int frame, unsigned int tv_sec, + unsigned int tv_usec, DRI2FrameEventPtr flip_info); +void sna_dri2_flip_event(unsigned int frame, unsigned int tv_sec, + unsigned int tv_usec, DRI2FrameEventPtr flip_info); + +extern Bool sna_crtc_on(xf86CrtcPtr crtc); +int sna_crtc_to_pipe(xf86CrtcPtr crtc); + +/* sna_render.c */ +void sna_kgem_reset(struct kgem *kgem); +void sna_kgem_flush(struct kgem *kgem); +void sna_kgem_context_switch(struct kgem *kgem, int new_mode); + +CARD32 sna_format_for_depth(int depth); + +void sna_debug_flush(struct sna *sna); + +static inline void +get_drawable_deltas(DrawablePtr drawable, PixmapPtr pixmap, int16_t *x, int16_t *y) +{ +#ifdef COMPOSITE + if (drawable->type == DRAWABLE_WINDOW) { + *x = -pixmap->screen_x; + *y = -pixmap->screen_y; + return; + } +#endif + *x = *y = 0; +} + +static inline int +get_drawable_dx(DrawablePtr drawable) +{ +#ifdef COMPOSITE + if (drawable->type == DRAWABLE_WINDOW) + return -get_drawable_pixmap(drawable)->screen_x; +#endif + return 0; +} + +static inline int +get_drawable_dy(DrawablePtr drawable) +{ +#ifdef COMPOSITE + if (drawable->type == DRAWABLE_WINDOW) + return -get_drawable_pixmap(drawable)->screen_y; +#endif + return 0; +} + +static inline Bool pixmap_is_scanout(PixmapPtr pixmap) +{ + ScreenPtr screen = pixmap->drawable.pScreen; + return pixmap == screen->GetScreenPixmap(screen); +} + +struct sna_pixmap *sna_pixmap_attach(PixmapPtr pixmap); + +PixmapPtr sna_pixmap_create_upload(ScreenPtr screen, + int width, int height, int depth); + +void sna_pixmap_move_to_cpu(PixmapPtr pixmap, bool write); +struct sna_pixmap *sna_pixmap_move_to_gpu(PixmapPtr pixmap); +struct sna_pixmap *sna_pixmap_force_to_gpu(PixmapPtr pixmap); + +void +sna_drawable_move_region_to_cpu(DrawablePtr drawable, + RegionPtr region, + Bool write); + +static inline void +sna_drawable_move_to_cpu(DrawablePtr drawable, bool write) +{ + RegionRec region; + + pixman_region_init_rect(®ion, + 0, 0, drawable->width, drawable->height); + sna_drawable_move_region_to_cpu(drawable, ®ion, write); +} + +static inline Bool +sna_drawable_move_to_gpu(DrawablePtr drawable) +{ + return sna_pixmap_move_to_gpu(get_drawable_pixmap(drawable)) != NULL; +} + +static inline Bool +sna_pixmap_is_gpu(PixmapPtr pixmap) +{ + struct sna_pixmap *priv = sna_pixmap(pixmap); + return priv && priv->gpu_bo; +} + +static inline struct kgem_bo *sna_pixmap_get_bo(PixmapPtr pixmap) +{ + return sna_pixmap(pixmap)->gpu_bo; +} + +static inline struct kgem_bo *sna_pixmap_pin(PixmapPtr pixmap) +{ + struct sna_pixmap *priv; + + priv = sna_pixmap_force_to_gpu(pixmap); + if (!priv) + return NULL; + + priv->pinned = 1; + return priv->gpu_bo; +} + + +static inline Bool +_sna_transform_point(const PictTransform *transform, + int64_t x, int64_t y, int64_t result[3]) +{ + int j; + + for (j = 0; j < 3; j++) + result[j] = (transform->matrix[j][0] * x + + transform->matrix[j][1] * y + + transform->matrix[j][2]); + + return result[2] != 0; +} + +static inline void +_sna_get_transformed_coordinates(int x, int y, + const PictTransform *transform, + float *x_out, float *y_out) +{ + + int64_t result[3]; + + _sna_transform_point(transform, x, y, result); + *x_out = result[0] / (double)result[2]; + *y_out = result[1] / (double)result[2]; +} + +void +sna_get_transformed_coordinates(int x, int y, + const PictTransform *transform, + float *x_out, float *y_out); + +Bool +sna_get_transformed_coordinates_3d(int x, int y, + const PictTransform *transform, + float *x_out, float *y_out, float *z_out); + +Bool sna_transform_is_affine(const PictTransform *t); +Bool sna_transform_is_integer_translation(const PictTransform *t, + int16_t *tx, int16_t *ty); +Bool sna_transform_is_translation(const PictTransform *t, + pixman_fixed_t *tx, pixman_fixed_t *ty); + + +static inline uint32_t pixmap_size(PixmapPtr pixmap) +{ + return (pixmap->drawable.height - 1) * pixmap->devKind + + pixmap->drawable.width * pixmap->drawable.bitsPerPixel/8; +} + +static inline struct kgem_bo *pixmap_vmap(struct kgem *kgem, PixmapPtr pixmap) +{ + struct sna_pixmap *priv; + + if (kgem->wedged) + return NULL; + + priv = sna_pixmap_attach(pixmap); + if (priv == NULL) + return NULL; + + if (priv->cpu_bo == NULL) { + priv->cpu_bo = kgem_create_map(kgem, + pixmap->devPrivate.ptr, + pixmap_size(pixmap), + 0); + if (priv->cpu_bo) + priv->cpu_bo->pitch = pixmap->devKind; + } + + return priv->cpu_bo; +} + +Bool sna_accel_pre_init(struct sna *sna); +Bool sna_accel_init(ScreenPtr sreen, struct sna *sna); +void sna_accel_block_handler(struct sna *sna); +void sna_accel_wakeup_handler(struct sna *sna); +void sna_accel_close(struct sna *sna); +void sna_accel_free(struct sna *sna); + +Bool sna_accel_create(struct sna *sna); +void sna_composite(CARD8 op, + PicturePtr src, + PicturePtr mask, + PicturePtr dst, + INT16 src_x, INT16 src_y, + INT16 mask_x, INT16 mask_y, + INT16 dst_x, INT16 dst_y, + CARD16 width, CARD16 height); +void sna_composite_rectangles(CARD8 op, + PicturePtr dst, + xRenderColor *color, + int num_rects, + xRectangle *rects); +void sna_composite_trapezoids(CARD8 op, + PicturePtr src, + PicturePtr dst, + PictFormatPtr maskFormat, + INT16 xSrc, INT16 ySrc, + int ntrap, xTrapezoid *traps); + +Bool sna_gradients_create(struct sna *sna); +void sna_gradients_close(struct sna *sna); + +Bool sna_glyphs_init(ScreenPtr screen); +Bool sna_glyphs_create(struct sna *sna); +void sna_glyphs(CARD8 op, + PicturePtr src, + PicturePtr dst, + PictFormatPtr mask, + INT16 xSrc, INT16 ySrc, + int nlist, + GlyphListPtr list, + GlyphPtr *glyphs); +void sna_glyph_unrealize(ScreenPtr screen, GlyphPtr glyph); +void sna_glyphs_close(struct sna *sna); + +void sna_read_boxes(struct sna *sna, + struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy, + PixmapPtr dst, int16_t dst_dx, int16_t dst_dy, + const BoxRec *box, int n); +void sna_write_boxes(struct sna *sna, + struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, + const void *src, int stride, int bpp, int16_t src_dx, int16_t src_dy, + const BoxRec *box, int n); + +struct kgem_bo *sna_replace(struct sna *sna, + struct kgem_bo *bo, + int width, int height, int bpp, + const void *src, int stride); + +Bool +sna_compute_composite_extents(BoxPtr extents, + PicturePtr src, PicturePtr mask, PicturePtr dst, + INT16 src_x, INT16 src_y, + INT16 mask_x, INT16 mask_y, + INT16 dst_x, INT16 dst_y, + CARD16 width, CARD16 height); +Bool +sna_compute_composite_region(RegionPtr region, + PicturePtr src, PicturePtr mask, PicturePtr dst, + INT16 src_x, INT16 src_y, + INT16 mask_x, INT16 mask_y, + INT16 dst_x, INT16 dst_y, + CARD16 width, CARD16 height); + +void +memcpy_blt(const void *src, void *dst, int bpp, + uint16_t src_stride, uint16_t dst_stride, + int16_t src_x, int16_t src_y, + int16_t dst_x, int16_t dst_y, + uint16_t width, uint16_t height); + +#define SNA_CREATE_FB 0x10 + +#endif /* _SNA_H */ diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c new file mode 100644 index 00000000..bab2adb5 --- /dev/null +++ b/src/sna/sna_accel.c @@ -0,0 +1,3306 @@ +/* + * Copyright (c) 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Chris Wilson <chris@chris-wilson.co.uk> + * + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "sna.h" + +#include <X11/fonts/font.h> +#include <X11/fonts/fontstruct.h> + +#include <xaarop.h> +#include <fb.h> +#include <dixfontstr.h> + +#ifdef RENDER +#include <mipict.h> +#include <fbpict.h> +#endif + +#include <sys/time.h> +#include <sys/mman.h> +#include <unistd.h> + +#if DEBUG_ACCEL +#undef DBG +#define DBG(x) ErrorF x +#else +#define NDEBUG 1 +#endif + +DevPrivateKeyRec sna_pixmap_index; + +#define PM_IS_SOLID(_draw, _pm) \ + (((_pm) & FbFullMask((_draw)->depth)) == FbFullMask((_draw)->depth)) + +#if DEBUG_ACCEL +static void _assert_pixmap_contains_box(PixmapPtr pixmap, BoxPtr box, const char *function) +{ + if (box->x1 < 0 || box->y1 < 0 || + box->x2 > pixmap->drawable.width || + box->y2 > pixmap->drawable.height) + { + ErrorF("%s: damage box is beyond the pixmap: box=(%d, %d), (%d, %d), pixmap=(%d, %d)\n", + __FUNCTION__, + box->x1, box->y1, box->x2, box->y2, + pixmap->drawable.width, + pixmap->drawable.height); + assert(0); + } +} +#define assert_pixmap_contains_box(p, b) _assert_pixmap_contains_box(p, b, __FUNCTION__) +#else +#define assert_pixmap_contains_box(p, b) +#endif + +static void sna_pixmap_destroy_gpu_bo(struct sna *sna, struct sna_pixmap *priv) +{ + kgem_bo_destroy(&sna->kgem, priv->gpu_bo); + priv->gpu_bo = NULL; +} + +static Bool sna_destroy_private(PixmapPtr pixmap, struct sna_pixmap *priv) +{ + struct sna *sna = to_sna_from_drawable(&pixmap->drawable); + + list_del(&priv->list); + + sna_damage_destroy(&priv->gpu_damage); + sna_damage_destroy(&priv->cpu_damage); + + if (priv->mapped) + munmap(pixmap->devPrivate.ptr, priv->gpu_bo->size); + + /* Always release the gpu bo back to the lower levels of caching */ + if (priv->gpu_bo) + kgem_bo_destroy(&sna->kgem, priv->gpu_bo); + + if (priv->cpu_bo) { + if (pixmap->usage_hint != CREATE_PIXMAP_USAGE_SCRATCH_HEADER && + kgem_bo_is_busy(&sna->kgem, priv->cpu_bo)) { + list_add_tail(&priv->list, &sna->deferred_free); + return false; + } + kgem_bo_sync(&sna->kgem, priv->cpu_bo, true); + kgem_bo_destroy(&sna->kgem, priv->cpu_bo); + } + + free(priv); + return TRUE; +} + +static uint32_t sna_pixmap_choose_tiling(PixmapPtr pixmap) +{ + struct sna *sna = to_sna_from_drawable(&pixmap->drawable); + uint32_t tiling, bit; + + /* Use tiling by default, but disable per user request */ + tiling = I915_TILING_X; + bit = pixmap->usage_hint == SNA_CREATE_FB ? + SNA_TILING_FB : SNA_TILING_2D; + if ((sna->tiling && (1 << bit)) == 0) + tiling = I915_TILING_NONE; + + /* Also adjust tiling if it is not supported or likely to + * slow us down, + */ + return kgem_choose_tiling(&sna->kgem, tiling, + pixmap->drawable.width, + pixmap->drawable.height, + pixmap->drawable.bitsPerPixel); +} + +struct sna_pixmap *sna_pixmap_attach(PixmapPtr pixmap) +{ + struct sna *sna; + struct sna_pixmap *priv; + + priv = sna_pixmap(pixmap); + if (priv) + return priv; + + switch (pixmap->usage_hint) { + case CREATE_PIXMAP_USAGE_GLYPH_PICTURE: + return NULL; + } + + sna = to_sna_from_drawable(&pixmap->drawable); + if (!kgem_can_create_2d(&sna->kgem, + pixmap->drawable.width, + pixmap->drawable.height, + pixmap->drawable.bitsPerPixel, + sna_pixmap_choose_tiling(pixmap))) + return NULL; + + priv = calloc(1, sizeof(*priv)); + if (!priv) + return NULL; + + list_init(&priv->list); + priv->pixmap = pixmap; + + sna_set_pixmap(pixmap, priv); + return priv; +} + +static PixmapPtr +sna_pixmap_create_scratch(ScreenPtr screen, + int width, int height, int depth, + uint32_t tiling) +{ + struct sna *sna = to_sna_from_screen(screen); + PixmapPtr pixmap; + struct sna_pixmap *priv; + int bpp = BitsPerPixel(depth); + + DBG(("%s(%d, %d, %d, tiling=%d)\n", __FUNCTION__, + width, height, depth, tiling)); + + if (tiling == I915_TILING_Y && !sna->have_render) + tiling = I915_TILING_X; + + tiling = kgem_choose_tiling(&sna->kgem, tiling, width, height, bpp); + if (!kgem_can_create_2d(&sna->kgem, width, height, bpp, tiling)) + return fbCreatePixmap(screen, width, height, depth, + CREATE_PIXMAP_USAGE_SCRATCH); + + /* you promise never to access this via the cpu... */ + pixmap = fbCreatePixmap(screen, 0, 0, depth, + CREATE_PIXMAP_USAGE_SCRATCH); + if (!pixmap) + return NullPixmap; + + priv = malloc(sizeof(*priv)); + if (!priv) { + fbDestroyPixmap(pixmap); + return NullPixmap; + } + + priv->gpu_bo = kgem_create_2d(&sna->kgem, + width, height, bpp, tiling, + 0); + if (priv->gpu_bo == NULL) { + free(priv); + fbDestroyPixmap(pixmap); + return NullPixmap; + } + + priv->source_count = 0; + priv->cpu_bo = NULL; + priv->cpu_damage = priv->gpu_damage = NULL; + priv->gpu_only = 1; + priv->pinned = 0; + priv->mapped = 0; + list_init(&priv->list); + + priv->pixmap = pixmap; + sna_set_pixmap(pixmap, priv); + + miModifyPixmapHeader(pixmap, + width, height, depth, bpp, + priv->gpu_bo->pitch, NULL); + pixmap->devPrivate.ptr = NULL; + + return pixmap; +} + + +static PixmapPtr sna_create_pixmap(ScreenPtr screen, + int width, int height, int depth, + unsigned int usage) +{ + PixmapPtr pixmap; + + DBG(("%s(%d, %d, %d, usage=%x)\n", __FUNCTION__, + width, height, depth, usage)); + + if (usage == CREATE_PIXMAP_USAGE_SCRATCH && + to_sna_from_screen(screen)->have_render) + return sna_pixmap_create_scratch(screen, + width, height, depth, + I915_TILING_Y); + + /* XXX could use last deferred free? */ + + pixmap = fbCreatePixmap(screen, width, height, depth, usage); + if (pixmap == NullPixmap) + return NullPixmap; + +/* XXX if (pixmap->drawable.devKind * height > 128) */ + sna_pixmap_attach(pixmap); + return pixmap; +} + +static Bool sna_destroy_pixmap(PixmapPtr pixmap) +{ + if (pixmap->refcnt == 1) { + struct sna_pixmap *priv = sna_pixmap(pixmap); + if (priv) { + if (!sna_destroy_private(pixmap, priv)) + return TRUE; + } + } + + return fbDestroyPixmap(pixmap); +} + +static void sna_pixmap_map_to_cpu(struct sna *sna, + PixmapPtr pixmap, + struct sna_pixmap *priv) +{ + DBG(("%s: AWOOGA, AWOOGA!\n", __FUNCTION__)); + + if (priv->mapped == 0) { + ScreenPtr screen = pixmap->drawable.pScreen; + void *ptr; + + ptr = kgem_bo_map(&sna->kgem, + priv->gpu_bo, + PROT_READ | PROT_WRITE); + assert(ptr != NULL); + + screen->ModifyPixmapHeader(pixmap, + pixmap->drawable.width, + pixmap->drawable.height, + pixmap->drawable.depth, + pixmap->drawable.bitsPerPixel, + priv->gpu_bo->pitch, + ptr); + priv->mapped = 1; + } + kgem_bo_submit(&sna->kgem, priv->gpu_bo); +} + +static inline void list_move(struct list *list, struct list *head) +{ + __list_del(list->prev, list->next); + list_add(list, head); +} + +void +sna_pixmap_move_to_cpu(PixmapPtr pixmap, bool write) +{ + struct sna *sna = to_sna_from_drawable(&pixmap->drawable); + struct sna_pixmap *priv; + + DBG(("%s(pixmap=%p, write=%d)\n", __FUNCTION__, pixmap, write)); + + priv = sna_pixmap(pixmap); + if (priv == NULL) { + DBG(("%s: not attached to %p\n", __FUNCTION__, pixmap)); + return; + } + + DBG(("%s: gpu_bo=%p, gpu_damage=%p, gpu_only=%d\n", + __FUNCTION__, priv->gpu_bo, priv->gpu_damage, priv->gpu_only)); + + if (priv->gpu_bo == NULL) { + DBG(("%s: no GPU bo\n", __FUNCTION__)); + goto done; + } + + if (priv->gpu_only) { + sna_pixmap_map_to_cpu(sna, pixmap, priv); + goto done; + } + + if (priv->gpu_damage) { + BoxPtr box; + int n; + + DBG(("%s: flushing GPU damage\n", __FUNCTION__)); + + n = sna_damage_get_boxes(priv->gpu_damage, &box); + if (n) { + struct kgem_bo *dst_bo; + Bool ok = FALSE; + + dst_bo = NULL; + if (sna->kgem.gen >= 30) + dst_bo = pixmap_vmap(&sna->kgem, pixmap); + if (dst_bo) + ok = sna->render.copy_boxes(sna, GXcopy, + pixmap, priv->gpu_bo, 0, 0, + pixmap, dst_bo, 0, 0, + box, n); + if (!ok) + sna_read_boxes(sna, + priv->gpu_bo, 0, 0, + pixmap, 0, 0, + box, n); + } + + __sna_damage_destroy(priv->gpu_damage); + priv->gpu_damage = NULL; + } + +done: + if (priv->cpu_bo) { + DBG(("%s: syncing CPU bo\n", __FUNCTION__)); + kgem_bo_sync(&sna->kgem, priv->cpu_bo, write); + } + + if (write) { + DBG(("%s: marking as damaged\n", __FUNCTION__)); + sna_damage_all(&priv->cpu_damage, + pixmap->drawable.width, + pixmap->drawable.height); + + if (priv->gpu_bo && !priv->pinned) + sna_pixmap_destroy_gpu_bo(sna, priv); + + if (priv->flush) + list_move(&priv->list, &sna->dirty_pixmaps); + } +} + +static Bool +region_subsumes_drawable(RegionPtr region, DrawablePtr drawable) +{ + const BoxRec *extents; + + if (REGION_NUM_RECTS(region) != 1) + return false; + + extents = RegionExtents(region); + return extents->x1 <= 0 && extents->y1 <= 0 && + extents->x2 >= drawable->width && + extents->y2 >= drawable->height; +} + +void +sna_drawable_move_region_to_cpu(DrawablePtr drawable, + RegionPtr region, + Bool write) +{ + struct sna *sna = to_sna_from_drawable(drawable); + PixmapPtr pixmap = get_drawable_pixmap(drawable); + struct sna_pixmap *priv; + int16_t dx, dy; + + DBG(("%s(pixmap=%p, [(%d, %d), (%d, %d)], write=%d)\n", + __FUNCTION__, pixmap, + RegionExtents(region)->x1, RegionExtents(region)->y1, + RegionExtents(region)->x2, RegionExtents(region)->y2, + write)); + + priv = sna_pixmap(pixmap); + if (priv == NULL) { + DBG(("%s: not attached to %p\n", __FUNCTION__, pixmap)); + return; + } + + if (priv->gpu_only) { + DBG(("%s: gpu only\n", __FUNCTION__)); + return sna_pixmap_map_to_cpu(sna, pixmap, priv); + } + + get_drawable_deltas(drawable, pixmap, &dx, &dy); + DBG(("%s: delta=(%d, %d)\n", __FUNCTION__, dx, dy)); + if (dx | dy) + RegionTranslate(region, dx, dy); + + if (region_subsumes_drawable(region, &pixmap->drawable)) { + DBG(("%s: region subsumes drawable\n", __FUNCTION__)); + if (dx | dy) + RegionTranslate(region, -dx, -dy); + return sna_pixmap_move_to_cpu(pixmap, write); + } + +#if 0 + pixman_region_intersect_rect(region, region, + 0, 0, + pixmap->drawable.width, + pixmap->drawable.height); +#endif + + if (priv->gpu_bo == NULL) + goto done; + + if (sna_damage_contains_box(priv->gpu_damage, + REGION_EXTENTS(NULL, region))) { + RegionRec want, need, *r; + + DBG(("%s: region intersects gpu damage\n", __FUNCTION__)); + + r = region; + /* expand the region to move 32x32 pixel blocks at a time */ + if (priv->cpu_damage == NULL) { + int n = REGION_NUM_RECTS(region), i; + BoxPtr boxes = REGION_RECTS(region); + BoxPtr blocks = malloc(sizeof(BoxRec) * REGION_NUM_RECTS(region)); + if (blocks) { + for (i = 0; i < n; i++) { + blocks[i].x1 = boxes[i].x1 & ~31; + if (blocks[i].x1 < 0) + blocks[i].x1 = 0; + + blocks[i].x2 = (boxes[i].x2 + 31) & ~31; + if (blocks[i].x2 > pixmap->drawable.width) + blocks[i].x2 = pixmap->drawable.width; + + blocks[i].y1 = boxes[i].y1 & ~31; + if (blocks[i].y1 < 0) + blocks[i].y1 = 0; + + blocks[i].y2 = (boxes[i].y2 + 31) & ~31; + if (blocks[i].y2 > pixmap->drawable.height) + blocks[i].y2 = pixmap->drawable.height; + } + if (pixman_region_init_rects(&want, blocks, i)) + r = &want; + free(blocks); + } + } + + pixman_region_init(&need); + if (sna_damage_intersect(priv->gpu_damage, r, &need)) { + BoxPtr box = REGION_RECTS(&need); + int n = REGION_NUM_RECTS(&need); + struct kgem_bo *dst_bo; + Bool ok = FALSE; + + dst_bo = NULL; + if (sna->kgem.gen >= 30) + dst_bo = pixmap_vmap(&sna->kgem, pixmap); + if (dst_bo) + ok = sna->render.copy_boxes(sna, GXcopy, + pixmap, priv->gpu_bo, 0, 0, + pixmap, dst_bo, 0, 0, + box, n); + if (!ok) + sna_read_boxes(sna, + priv->gpu_bo, 0, 0, + pixmap, 0, 0, + box, n); + + sna_damage_subtract(&priv->gpu_damage, + n <= REGION_NUM_RECTS(r) ? &need : r); + RegionUninit(&need); + } + if (r == &want) + pixman_region_fini(&want); + } + +done: + if (priv->cpu_bo) { + DBG(("%s: syncing cpu bo\n", __FUNCTION__)); + kgem_bo_sync(&sna->kgem, priv->cpu_bo, write); + } + + if (write) { + DBG(("%s: applying cpu damage\n", __FUNCTION__)); + assert_pixmap_contains_box(pixmap, RegionExtents(region)); + sna_damage_add(&priv->cpu_damage, region); + if (priv->flush) + list_move(&priv->list, &sna->dirty_pixmaps); + } + + if (dx | dy) + RegionTranslate(region, -dx, -dy); +} + +static inline Bool +_sna_drawable_use_gpu_bo(DrawablePtr drawable, const BoxPtr box) +{ + PixmapPtr pixmap = get_drawable_pixmap(drawable); + struct sna_pixmap *priv = sna_pixmap(pixmap); + BoxRec extents; + int16_t dx, dy; + + if (priv == NULL) + return FALSE; + if (priv->gpu_bo == NULL) + return FALSE; + + if (priv->cpu_damage == NULL) + return TRUE; + + assert(!priv->gpu_only); + get_drawable_deltas(drawable, pixmap, &dx, &dy); + + extents = *box; + extents.x1 += dx; + extents.x2 += dx; + extents.y1 += dy; + extents.y2 += dy; + + return sna_damage_contains_box(priv->cpu_damage, + &extents) == PIXMAN_REGION_OUT; +} + +static inline Bool +sna_drawable_use_gpu_bo(DrawablePtr drawable, const BoxPtr box) +{ + Bool ret = _sna_drawable_use_gpu_bo(drawable, box); + DBG(("%s((%d, %d), (%d, %d)) = %d\n", __FUNCTION__, + box->x1, box->y1, box->x2, box->y2, ret)); + return ret; +} + +static inline Bool +_sna_drawable_use_cpu_bo(DrawablePtr drawable, const BoxPtr box) +{ + PixmapPtr pixmap = get_drawable_pixmap(drawable); + struct sna_pixmap *priv = sna_pixmap(pixmap); + BoxRec extents; + int16_t dx, dy; + + if (priv == NULL) + return FALSE; + if (priv->cpu_bo == NULL) + return FALSE; + + if (priv->gpu_damage == NULL) + return TRUE; + + get_drawable_deltas(drawable, pixmap, &dx, &dy); + + extents = *box; + extents.x1 += dx; + extents.x2 += dx; + extents.y1 += dy; + extents.y2 += dy; + + return sna_damage_contains_box(priv->gpu_damage, + &extents) == PIXMAN_REGION_OUT; +} + +static inline Bool +sna_drawable_use_cpu_bo(DrawablePtr drawable, const BoxPtr box) +{ + Bool ret = _sna_drawable_use_cpu_bo(drawable, box); + DBG(("%s((%d, %d), (%d, %d)) = %d\n", __FUNCTION__, + box->x1, box->y1, box->x2, box->y2, ret)); + return ret; +} + +PixmapPtr +sna_pixmap_create_upload(ScreenPtr screen, + int width, int height, int depth) +{ + struct sna *sna = to_sna_from_screen(screen); + PixmapPtr pixmap; + struct sna_pixmap *priv; + int bpp = BitsPerPixel(depth); + int pad = ALIGN(width * bpp / 8, 4); + void *ptr; + + DBG(("%s(%d, %d, %d)\n", __FUNCTION__, width, height, depth)); + assert(width); + assert(height); + if (!sna->have_render || + !kgem_can_create_2d(&sna->kgem, + width, height, bpp, + I915_TILING_NONE)) + return fbCreatePixmap(screen, width, height, depth, + CREATE_PIXMAP_USAGE_SCRATCH); + + pixmap = fbCreatePixmap(screen, 0, 0, depth, + CREATE_PIXMAP_USAGE_SCRATCH); + if (!pixmap) + return NullPixmap; + + priv = malloc(sizeof(*priv)); + if (!priv) { + fbDestroyPixmap(pixmap); + return NullPixmap; + } + + priv->gpu_bo = kgem_create_buffer(&sna->kgem, pad*height, true, &ptr); + if (!priv->gpu_bo) { + free(priv); + fbDestroyPixmap(pixmap); + return NullPixmap; + } + + priv->gpu_bo->pitch = pad; + + priv->source_count = SOURCE_BIAS; + priv->cpu_bo = NULL; + priv->cpu_damage = priv->gpu_damage = NULL; + priv->gpu_only = 0; + priv->pinned = 0; + priv->mapped = 0; + list_init(&priv->list); + + priv->pixmap = pixmap; + sna_set_pixmap(pixmap, priv); + + miModifyPixmapHeader(pixmap, width, height, depth, bpp, pad, ptr); + return pixmap; +} + +struct sna_pixmap * +sna_pixmap_force_to_gpu(PixmapPtr pixmap) +{ + struct sna_pixmap *priv; + + DBG(("%s(pixmap=%p)\n", __FUNCTION__, pixmap)); + + priv = sna_pixmap(pixmap); + if (priv == NULL) { + priv = sna_pixmap_attach(pixmap); + if (priv == NULL) + return NULL; + + DBG(("%s: created priv and marking all cpu damaged\n", + __FUNCTION__)); + sna_damage_all(&priv->cpu_damage, + pixmap->drawable.width, + pixmap->drawable.height); + } + + if (priv->gpu_bo == NULL) { + struct sna *sna = to_sna_from_drawable(&pixmap->drawable); + + priv->gpu_bo = kgem_create_2d(&sna->kgem, + pixmap->drawable.width, + pixmap->drawable.height, + pixmap->drawable.bitsPerPixel, + sna_pixmap_choose_tiling(pixmap), + 0); + if (priv->gpu_bo == NULL) + return NULL; + + DBG(("%s: created gpu bo\n", __FUNCTION__)); + } + + if (!sna_pixmap_move_to_gpu(pixmap)) + return NULL; + + return priv; +} + +struct sna_pixmap * +sna_pixmap_move_to_gpu(PixmapPtr pixmap) +{ + struct sna *sna = to_sna_from_drawable(&pixmap->drawable); + struct sna_pixmap *priv; + BoxPtr box; + int n; + + DBG(("%s()\n", __FUNCTION__)); + + priv = sna_pixmap(pixmap); + if (priv == NULL) + return NULL; + + sna_damage_reduce(&priv->cpu_damage); + DBG(("%s: CPU damage? %d\n", __FUNCTION__, priv->cpu_damage != NULL)); + + if (priv->gpu_bo == NULL) { + if (!sna->kgem.wedged) + priv->gpu_bo = + kgem_create_2d(&sna->kgem, + pixmap->drawable.width, + pixmap->drawable.height, + pixmap->drawable.bitsPerPixel, + sna_pixmap_choose_tiling(pixmap), + priv->cpu_damage ? CREATE_INACTIVE : 0); + if (priv->gpu_bo == NULL) { + assert(list_is_empty(&priv->list)); + return NULL; + } + } + + if (priv->cpu_damage == NULL) + goto done; + + n = sna_damage_get_boxes(priv->cpu_damage, &box); + if (n) { + struct kgem_bo *src_bo; + Bool ok = FALSE; + + src_bo = pixmap_vmap(&sna->kgem, pixmap); + if (src_bo) + ok = sna->render.copy_boxes(sna, GXcopy, + pixmap, src_bo, 0, 0, + pixmap, priv->gpu_bo, 0, 0, + box, n); + if (!ok) { + if (n == 1 && !priv->pinned && + box->x1 <= 0 && box->y1 <= 0 && + box->x2 >= pixmap->drawable.width && + box->y2 >= pixmap->drawable.height) { + priv->gpu_bo = + sna_replace(sna, + priv->gpu_bo, + pixmap->drawable.width, + pixmap->drawable.height, + pixmap->drawable.bitsPerPixel, + pixmap->devPrivate.ptr, + pixmap->devKind); + } else { + sna_write_boxes(sna, + priv->gpu_bo, 0, 0, + pixmap->devPrivate.ptr, + pixmap->devKind, + pixmap->drawable.bitsPerPixel, + 0, 0, + box, n); + } + } + } + + __sna_damage_destroy(priv->cpu_damage); + priv->cpu_damage = NULL; + +done: + list_del(&priv->list); + return priv; +} + +static void sna_gc_move_to_cpu(GCPtr gc) +{ + DBG(("%s\n", __FUNCTION__)); + + if (gc->stipple) + sna_drawable_move_to_cpu(&gc->stipple->drawable, false); + + if (gc->fillStyle == FillTiled) + sna_drawable_move_to_cpu(&gc->tile.pixmap->drawable, false); +} + +static Bool +sna_put_image_upload_blt(DrawablePtr drawable, GCPtr gc, RegionPtr region, + int x, int y, int w, int h, char *bits, int stride) +{ + struct sna *sna = to_sna_from_drawable(drawable); + PixmapPtr pixmap = get_drawable_pixmap(drawable); + struct sna_pixmap *priv = sna_pixmap(pixmap); + struct kgem_bo *src_bo; + Bool ok = FALSE; + BoxPtr box; + int nbox; + int16_t dx, dy; + + box = REGION_RECTS(region); + nbox = REGION_NUM_RECTS(region); + + DBG(("%s: %d x [(%d, %d), (%d, %d)...]\n", + __FUNCTION__, nbox, + box->x1, box->y1, box->x2, box->y2)); + + if (!priv->pinned && nbox == 1 && + box->x1 <= 0 && box->y1 <= 0 && + box->x2 >= pixmap->drawable.width && + box->y2 >= pixmap->drawable.height) { + priv->gpu_bo = + sna_replace(sna, priv->gpu_bo, + pixmap->drawable.width, + pixmap->drawable.height, + pixmap->drawable.bitsPerPixel, + bits, stride); + return TRUE; + } + + get_drawable_deltas(drawable, pixmap, &dx, &dy); + x += dx + drawable->x; + y += dy + drawable->y; + + src_bo = kgem_create_map(&sna->kgem, bits, stride*h, 1); + if (src_bo) { + src_bo->pitch = stride; + ok = sna->render.copy_boxes(sna, gc->alu, + pixmap, src_bo, -x, -y, + pixmap, priv->gpu_bo, 0, 0, + box, nbox); + kgem_bo_sync(&sna->kgem, src_bo, true); + kgem_bo_destroy(&sna->kgem, src_bo); + } + + if (!ok && gc->alu == GXcopy) { + sna_write_boxes(sna, + priv->gpu_bo, 0, 0, + bits, + stride, + pixmap->drawable.bitsPerPixel, + -x, -y, + box, nbox); + ok = TRUE; + } + + return ok; +} + +static Bool +sna_put_image_blt(DrawablePtr drawable, GCPtr gc, RegionPtr region, + int x, int y, int w, int h, char *bits, int stride) +{ + struct sna *sna = to_sna_from_drawable(drawable); + PixmapPtr pixmap = get_drawable_pixmap(drawable); + struct sna_pixmap *priv = sna_pixmap(pixmap); + int16_t dx, dy; + + if (!priv->gpu_bo) + return false; + + if (priv->gpu_only) + return sna_put_image_upload_blt(drawable, gc, region, + x, y, w, h, bits, stride); + + if (gc->alu != GXcopy) + return false; + + if (priv->cpu_bo) + kgem_bo_sync(&sna->kgem, priv->cpu_bo, true); + + if (region_subsumes_drawable(region, &pixmap->drawable)) { + sna_damage_destroy(&priv->gpu_damage); + sna_damage_all(&priv->cpu_damage, + pixmap->drawable.width, + pixmap->drawable.height); + if (priv->gpu_bo && !priv->pinned) + sna_pixmap_destroy_gpu_bo(sna, priv); + } else { + assert_pixmap_contains_box(pixmap, RegionExtents(region)); + sna_damage_subtract(&priv->gpu_damage, region); + sna_damage_add(&priv->cpu_damage, region); + } + + get_drawable_deltas(drawable, pixmap, &dx, &dy); + dx += drawable->x; + dy += drawable->y; + + DBG(("%s: fbPutZImage(%d[+%d], %d[+%d], %d, %d)\n", + __FUNCTION__, + x+dx, pixmap->drawable.x, + y+dy, pixmap->drawable.y, + w, h)); + fbPutZImage(&pixmap->drawable, region, + GXcopy, ~0U, + x + dx, y + dy, w, h, + (FbStip*)bits, stride/sizeof(FbStip)); + return true; +} + +static void +sna_put_image(DrawablePtr drawable, GCPtr gc, int depth, + int x, int y, int w, int h, int left, int format, + char *bits) +{ + PixmapPtr pixmap = get_drawable_pixmap(drawable); + struct sna_pixmap *priv = sna_pixmap(pixmap); + RegionRec region, *clip; + BoxRec box; + int16_t dx, dy; + + DBG(("%s((%d, %d)x(%d, %d)\n", __FUNCTION__, x, y, w, h)); + + if (w == 0 || h == 0) + return; + + if (priv == NULL) { + fbPutImage(drawable, gc, depth, x, y, w, h, left, format, bits); + return; + } + + get_drawable_deltas(drawable, pixmap, &dx, &dy); + + box.x1 = x + drawable->x + dx; + box.y1 = y + drawable->y + dy; + box.x2 = box.x1 + w; + box.y2 = box.y1 + h; + + if (box.x1 < 0) + box.x1 = 0; + if (box.y1 < 0) + box.y1 = 0; + if (box.x2 > pixmap->drawable.width) + box.x2 = pixmap->drawable.width; + if (box.y2 > pixmap->drawable.height) + box.y2 = pixmap->drawable.height; + + RegionInit(®ion, &box, 1); + + clip = fbGetCompositeClip(gc); + if (clip) { + RegionTranslate(clip, dx, dy); + RegionIntersect(®ion, ®ion, clip); + RegionTranslate(clip, -dx, -dy); + } + + if (format != ZPixmap || + !PM_IS_SOLID(drawable, gc->planemask) || + !sna_put_image_blt(drawable, gc, ®ion, + x, y, w, h, + bits, PixmapBytePad(w, depth))) { + RegionTranslate(®ion, -dx, -dy); + + sna_drawable_move_region_to_cpu(drawable, ®ion, true); + DBG(("%s: fbPutImage(%d, %d, %d, %d)\n", + __FUNCTION__, x, y, w, h)); + fbPutImage(drawable, gc, depth, x, y, w, h, left, format, bits); + } + + RegionUninit(®ion); +} + +static void +sna_copy_boxes(DrawablePtr src, DrawablePtr dst, GCPtr gc, + BoxPtr box, int n, + int dx, int dy, + Bool reverse, Bool upsidedown, Pixel bitplane, + void *closure) +{ + struct sna *sna = to_sna_from_drawable(dst); + PixmapPtr src_pixmap = get_drawable_pixmap(src); + PixmapPtr dst_pixmap = get_drawable_pixmap(dst); + struct sna_pixmap *src_priv = sna_pixmap(src_pixmap); + struct sna_pixmap *dst_priv = sna_pixmap(dst_pixmap); + int alu = gc ? gc->alu : GXcopy; + int16_t src_dx, src_dy; + int16_t dst_dx, dst_dy; + int stride, bpp; + char *bits; + RegionRec region; + Bool replaces; + + if (n == 0) + return; + + DBG(("%s (boxes=%dx[(%d, %d), (%d, %d)...], src=+(%d, %d), alu=%d, src.size=%dx%d, dst.size=%dx%d)\n", + __FUNCTION__, n, + box[0].x1, box[0].y1, box[0].x2, box[0].y2, + dx, dy, alu, + src_pixmap->drawable.width, src_pixmap->drawable.height, + dst_pixmap->drawable.width, dst_pixmap->drawable.height)); + + pixman_region_init_rects(®ion, box, n); + + bpp = dst_pixmap->drawable.bitsPerPixel; + + get_drawable_deltas(dst, dst_pixmap, &dst_dx, &dst_dy); + get_drawable_deltas(src, src_pixmap, &src_dx, &src_dy); + src_dx += dx; + src_dy += dy; + + replaces = alu == GXcopy && n == 1 && + box->x1 + dst_dx <= 0 && + box->y1 + dst_dy <= 0 && + box->x2 + dst_dx >= dst_pixmap->drawable.width && + box->y2 + dst_dy >= dst_pixmap->drawable.height; + + DBG(("%s: dst=(priv=%p, gpu_bo=%p, cpu_bo=%p), src=(priv=%p, gpu_bo=%p, cpu_bo=%p), replaces=%d\n", + __FUNCTION__, + dst_priv, + dst_priv ? dst_priv->gpu_bo : NULL, + dst_priv ? dst_priv->cpu_bo : NULL, + src_priv, + src_priv ? src_priv->gpu_bo : NULL, + src_priv ? src_priv->cpu_bo : NULL, + replaces)); + + /* Try to maintain the data on the GPU */ + if (dst_priv && dst_priv->gpu_bo == NULL && + src_priv && src_priv->gpu_bo != NULL && + alu == GXcopy) { + uint32_t tiling = + sna_pixmap_choose_tiling(dst_pixmap); + + DBG(("%s: create dst GPU bo for copy\n", __FUNCTION__)); + + if (!sna->kgem.wedged && + kgem_can_create_2d(&sna->kgem, + dst_pixmap->drawable.width, + dst_pixmap->drawable.height, + dst_pixmap->drawable.bitsPerPixel, + tiling)) + dst_priv->gpu_bo = + kgem_create_2d(&sna->kgem, + dst_pixmap->drawable.width, + dst_pixmap->drawable.height, + dst_pixmap->drawable.bitsPerPixel, + tiling, 0); + } + + if (dst_priv && dst_priv->gpu_bo) { + if (!src_priv && !dst_priv->gpu_only) { + DBG(("%s: fallback - src_priv=%p but dst gpu_only=%d\n", + __FUNCTION__, + src_priv, dst_priv->gpu_only)); + goto fallback; + } + + if (alu != GXcopy && !sna_pixmap_move_to_gpu(dst_pixmap)) { + DBG(("%s: fallback - not a pure copy and failed to move dst to GPU\n", + __FUNCTION__)); + goto fallback; + } + + if (src_priv && src_priv->gpu_bo && + sna_pixmap_move_to_gpu(src_pixmap)) { + if (!sna->render.copy_boxes(sna, alu, + src_pixmap, src_priv->gpu_bo, src_dx, src_dy, + dst_pixmap, dst_priv->gpu_bo, dst_dx, dst_dy, + box, n)) { + DBG(("%s: fallback - accelerated copy boxes failed\n", + __FUNCTION__)); + goto fallback; + } + + if (replaces) { + sna_damage_destroy(&dst_priv->cpu_damage); + sna_damage_all(&dst_priv->gpu_damage, + dst_pixmap->drawable.width, + dst_pixmap->drawable.height); + } else { + RegionTranslate(®ion, dst_dx, dst_dy); + assert_pixmap_contains_box(dst_pixmap, + RegionExtents(®ion)); + sna_damage_add(&dst_priv->gpu_damage, ®ion); + if (alu == GXcopy) + sna_damage_subtract(&dst_priv->cpu_damage, + ®ion); + RegionTranslate(®ion, -dst_dx, -dst_dy); + } + } else { + if (alu != GXcopy) { + DBG(("%s: fallback - not a copy and source is on the CPU\n", + __FUNCTION__)); + goto fallback; + } + + if (src_priv) { + RegionTranslate(®ion, src_dx, src_dy); + sna_drawable_move_region_to_cpu(&src_pixmap->drawable, + ®ion, false); + RegionTranslate(®ion, -src_dx, -src_dy); + } + + if (!dst_priv->pinned && replaces) { + stride = src_pixmap->devKind; + bits = src_pixmap->devPrivate.ptr; + bits += src_dy * stride + src_dx * bpp / 8; + + dst_priv->gpu_bo = + sna_replace(sna, + dst_priv->gpu_bo, + dst_pixmap->drawable.width, + dst_pixmap->drawable.height, + bpp, bits, stride); + + sna_damage_destroy(&dst_priv->cpu_damage); + sna_damage_all(&dst_priv->gpu_damage, + dst_pixmap->drawable.width, + dst_pixmap->drawable.height); + } else { + DBG(("%s: dst is on the GPU, src is on the CPU, uploading\n", + __FUNCTION__)); + sna_write_boxes(sna, + dst_priv->gpu_bo, dst_dx, dst_dy, + src_pixmap->devPrivate.ptr, + src_pixmap->devKind, + src_pixmap->drawable.bitsPerPixel, + src_dx, src_dy, + box, n); + + RegionTranslate(®ion, dst_dx, dst_dy); + assert_pixmap_contains_box(dst_pixmap, + RegionExtents(®ion)); + sna_damage_add(&dst_priv->gpu_damage, + ®ion); + sna_damage_subtract(&dst_priv->cpu_damage, + ®ion); + RegionTranslate(®ion, -dst_dx, -dst_dy); + } + } + } else { + FbBits *dst_bits, *src_bits; + int dst_stride, src_stride; + +fallback: + DBG(("%s: fallback -- src=(%d, %d), dst=(%d, %d)\n", + __FUNCTION__, src_dx, src_dy, dst_dx, dst_dy)); + if (src_priv) { + RegionTranslate(®ion, src_dx, src_dy); + sna_drawable_move_region_to_cpu(&src_pixmap->drawable, + ®ion, false); + RegionTranslate(®ion, -src_dx, -src_dy); + } + + RegionTranslate(®ion, dst_dx, dst_dy); + if (dst_priv) { + if (alu == GXcopy) { + if (replaces) { + sna_damage_destroy(&dst_priv->gpu_damage); + sna_damage_all(&dst_priv->cpu_damage, + dst_pixmap->drawable.width, + dst_pixmap->drawable.height); + if (dst_priv->gpu_bo && !dst_priv->pinned) + sna_pixmap_destroy_gpu_bo(sna, dst_priv); + } else { + assert_pixmap_contains_box(dst_pixmap, + RegionExtents(®ion)); + sna_damage_subtract(&dst_priv->gpu_damage, + ®ion); + sna_damage_add(&dst_priv->cpu_damage, + ®ion); + } + } else + sna_drawable_move_region_to_cpu(&dst_pixmap->drawable, + ®ion, true); + } + + dst_stride = dst_pixmap->devKind; + src_stride = src_pixmap->devKind; + + dst_bits = (FbBits *) + ((char *)dst_pixmap->devPrivate.ptr + + dst_dy * dst_stride + dst_dx * bpp / 8); + src_bits = (FbBits *) + ((char *)src_pixmap->devPrivate.ptr + + src_dy * src_stride + src_dx * bpp / 8); + + if (alu == GXcopy && !reverse && !upsidedown && bpp >= 8) { + do { + DBG(("%s: memcpy_blt(box=(%d, %d), (%d, %d), src=(%d, %d), dst=(%d, %d), pitches=(%d, %d))\n", + __FUNCTION__, + box->x1, box->y1, + box->x2 - box->x1, + box->y2 - box->y1, + src_dx, src_dy, + dst_dx, dst_dy, + src_stride, dst_stride)); + memcpy_blt(src_bits, dst_bits, bpp, + src_stride, dst_stride, + box->x1, box->y1, + box->x1, box->y1, + box->x2 - box->x1, + box->y2 - box->y1); + box++; + } while (--n); + } else { + dst_stride /= sizeof(FbBits); + src_stride /= sizeof(FbBits); + do { + DBG(("%s: fbBlt (%d, %d), (%d, %d)\n", + __FUNCTION__, + box->x1, box->y1, + box->x2, box->y2)); + fbBlt(src_bits + box->y1 * src_stride, + src_stride, + box->x1 * bpp, + + dst_bits + box->y1 * dst_stride, + dst_stride, + box->x1 * bpp, + + (box->x2 - box->x1) * bpp, + (box->y2 - box->y1), + + alu, -1, bpp, + reverse, upsidedown); + box++; + }while (--n); + } + } + RegionUninit(®ion); +} + +static RegionPtr +sna_copy_area(DrawablePtr src, DrawablePtr dst, GCPtr gc, + int src_x, int src_y, + int width, int height, + int dst_x, int dst_y) +{ + struct sna *sna = to_sna_from_drawable(dst); + + DBG(("%s: src=(%d, %d)x(%d, %d) -> dst=(%d, %d)\n", + __FUNCTION__, src_x, src_y, width, height, dst_x, dst_y)); + + if (sna->kgem.wedged || + src->bitsPerPixel != dst->bitsPerPixel || + !PM_IS_SOLID(dst, gc->planemask)) { + BoxRec box; + RegionRec region; + + box.x1 = dst_x + dst->x; + box.y1 = dst_y + dst->y; + box.x2 = box.x1 + width; + box.y2 = box.y1 + height; + RegionInit(®ion, &box, 1); + + if (gc->pCompositeClip) + RegionIntersect(®ion, ®ion, gc->pCompositeClip); + + sna_drawable_move_region_to_cpu(dst, ®ion, true); + RegionTranslate(®ion, + src_x - dst_x - dst->x + src->x, + src_y - dst_y - dst->y + src->y); + sna_drawable_move_region_to_cpu(src, ®ion, false); + + return fbCopyArea(src, dst, gc, + src_x, src_y, + width, height, + dst_x, dst_y); + } + + return miDoCopy(src, dst, gc, + src_x, src_y, + width, height, + dst_x, dst_y, + sna_copy_boxes, 0, NULL); +} + +#define TRIM_BOX(box, d) do { \ + if (box.x1 < 0) box.x1 = 0; \ + if (box.x2 > d->width) box.x2 = d->width; \ + if (box.y1 < 0) box.y1 = 0; \ + if (box.y2 > d->height) box.y2 = d->height; \ +} while (0) + +#define CLIP_BOX(box, gc) \ + if (gc->pCompositeClip) { \ + BoxPtr extents = &gc->pCompositeClip->extents;\ + if (box.x1 < extents->x1) box.x1 = extents->x1; \ + if (box.x2 > extents->x2) box.x2 = extents->x2; \ + if (box.y1 < extents->y1) box.y1 = extents->y1; \ + if (box.y2 > extents->y2) box.y2 = extents->y2; \ + } + +#define TRANSLATE_BOX(box, d) do { \ + box.x1 += d->x; \ + box.x2 += d->x; \ + box.y1 += d->y; \ + box.y2 += d->y; \ +} while (0) + +#define TRIM_AND_TRANSLATE_BOX(box, d, gc) do { \ + TRIM_BOX(box, d); \ + TRANSLATE_BOX(box, d); \ + CLIP_BOX(box, gc); \ +} while (0) + +#define BOX_ADD_PT(box, x, y) do { \ + if (box.x1 > x) box.x1 = x; \ + else if (box.x2 < x) box.x2 = x; \ + if (box.y1 > y) box.y1 = y; \ + else if (box.y2 < y) box.y2 = y; \ +} while (0) + +#define BOX_ADD_RECT(box, x, y, w, h) do { \ + if (box.x1 > x) box.x1 = x; \ + else if (box.x2 < x + w) box.x2 = x + w; \ + if (box.y1 > y) box.y1 = y; \ + else if (box.y2 < y + h) box.y2 = y + h; \ +} while (0) + +#define BOX_EMPTY(box) (box.x2 <= box.x1 || box.y2 <= box.y1) + +static Bool +box_intersect(BoxPtr a, const BoxPtr b) +{ + if (a->x1 < b->x1) + a->x1 = b->x1; + if (a->x2 > b->x2) + a->x2 = b->x2; + if (a->y1 < b->y1) + a->y1 = b->y1; + if (a->y2 > b->y2) + a->y2 = b->y2; + + return a->x1 < a->x2 && a->y1 < a->y2; +} + +static Bool +sna_fill_init_blt(struct sna_fill_op *fill, + struct sna *sna, + PixmapPtr pixmap, + struct kgem_bo *bo, + uint8_t alu, + uint32_t pixel) +{ + memset(fill, 0, sizeof(*fill)); + return sna->render.fill(sna, alu, pixmap, bo, pixel, fill); +} + +static Bool +sna_copy_init_blt(struct sna_copy_op *copy, + struct sna *sna, + PixmapPtr src, struct kgem_bo *src_bo, + PixmapPtr dst, struct kgem_bo *dst_bo, + uint8_t alu) +{ + memset(copy, 0, sizeof(*copy)); + return sna->render.copy(sna, alu, src, src_bo, dst, dst_bo, copy); +} + +static Bool +sna_fill_spans_blt(DrawablePtr drawable, + struct kgem_bo *bo, struct sna_damage **damage, + GCPtr gc, int n, + DDXPointPtr pt, int *width, int sorted) +{ + struct sna *sna = to_sna_from_drawable(drawable); + PixmapPtr pixmap = get_drawable_pixmap(drawable); + struct sna_fill_op fill; + BoxPtr extents, clip; + int nclip; + int16_t dx, dy; + + if (!sna_fill_init_blt(&fill, sna, pixmap, bo, gc->alu, gc->fgPixel)) + return false; + + extents = REGION_EXTENTS(gc->screen, gc->pCompositeClip); + DBG(("%s: clip %d x [(%d, %d), (%d, %d)] x %d [(%d, %d)...]\n", + __FUNCTION__, + REGION_NUM_RECTS(gc->pCompositeClip), + extents->x1, extents->y1, extents->x2, extents->y2, + n, pt->x, pt->y)); + + get_drawable_deltas(drawable, pixmap, &dx, &dy); + while (n--) { + int X1 = pt->x; + int y = pt->y; + int X2 = X1 + (int)*width; + + if (!gc->miTranslate) { + X1 += drawable->x; + X2 += drawable->x; + y += drawable->y; + } + + pt++; + width++; + + if (y < extents->y1 || extents->y2 <= y) + continue; + + if (X1 < extents->x1) + X1 = extents->x1; + + if (X2 > extents->x2) + X2 = extents->x2; + + if (X1 >= X2) + continue; + + nclip = REGION_NUM_RECTS(gc->pCompositeClip); + if (nclip == 1) { + X1 += dx; + if (X1 < 0) + X1 = 0; + X2 += dx; + if (X2 > pixmap->drawable.width) + X2 = pixmap->drawable.width; + if (X2 > X1) { + fill.blt(sna, &fill, X1, y+dy, X2-X1, 1); + if (damage) { + BoxRec box; + + box.x1 = X1; + box.x2 = X2; + box.y1 = y + dy; + box.y2 = box.y1 + 1; + + assert_pixmap_contains_box(pixmap, &box); + sna_damage_add_box(damage, &box); + } + } + } else { + clip = REGION_RECTS(gc->pCompositeClip); + while (nclip--) { + if (clip->y1 <= y && y < clip->y2) { + int x1 = clip->x1; + int x2 = clip->x2; + + if (x1 < X1) + x1 = X1; + x1 += dx; + if (x1 < 0) + x1 = 0; + if (x2 > X2) + x2 = X2; + x2 += dx; + if (x2 > pixmap->drawable.width) + x2 = pixmap->drawable.width; + + if (x2 > x1) { + fill.blt(sna, &fill, + x1, y + dy, + x2-x1, 1); + if (damage) { + BoxRec box; + + box.x1 = x1; + box.y1 = y + dy; + box.x2 = x2; + box.y2 = box.y1 + 1; + + assert_pixmap_contains_box(pixmap, &box); + sna_damage_add_box(damage, &box); + } + } + } + clip++; + } + } + } + fill.done(sna, &fill); + return TRUE; +} + +static Bool +sna_spans_extents(DrawablePtr drawable, GCPtr gc, + int n, DDXPointPtr pt, int *width, + BoxPtr out) +{ + BoxRec box; + + if (n == 0) + return true; + + box.x1 = pt->x; + box.x2 = box.x1 + *width; + box.y2 = box.y1 = pt->y; + + while (--n) { + pt++; + width++; + if (box.x1 > pt->x) + box.x1 = pt->x; + if (box.x2 < pt->x + *width) + box.x2 = pt->x + *width; + + if (box.y1 > pt->y) + box.y1 = pt->y; + else if (box.y2 < pt->y) + box.y2 = pt->y; + } + box.y2++; + + if (gc) { + if (!gc->miTranslate) + TRANSLATE_BOX(box, drawable); + CLIP_BOX(box, gc); + } + *out = box; + return BOX_EMPTY(box); +} + +static void +sna_fill_spans(DrawablePtr drawable, GCPtr gc, int n, + DDXPointPtr pt, int *width, int sorted) +{ + struct sna *sna = to_sna_from_drawable(drawable); + BoxRec extents; + RegionRec region; + + DBG(("%s(n=%d, pt[0]=(%d, %d)\n", + __FUNCTION__, n, pt[0].x, pt[0].y)); + + if (sna_spans_extents(drawable, gc, n, pt, width, &extents)) + return; + + DBG(("%s: extents (%d, %d), (%d, %d)\n", __FUNCTION__, + extents.x1, extents.y1, extents.x2, extents.y2)); + + if (sna->kgem.wedged) + goto fallback; + + if (gc->fillStyle == FillSolid && + PM_IS_SOLID(drawable, gc->planemask)) { + struct sna_pixmap *priv = sna_pixmap_from_drawable(drawable); + + DBG(("%s: trying solid fill [alu=%d, pixel=%08lx] blt paths\n", + __FUNCTION__, gc->alu, gc->fgPixel)); + + if (sna_drawable_use_gpu_bo(drawable, &extents) && + sna_fill_spans_blt(drawable, + priv->gpu_bo, + priv->gpu_only ? NULL : &priv->gpu_damage, + gc, n, pt, width, sorted)) + return; + + if (sna_drawable_use_cpu_bo(drawable, &extents) && + sna_fill_spans_blt(drawable, + priv->cpu_bo, &priv->cpu_damage, + gc, n, pt, width, sorted)) + return; + } + +fallback: + DBG(("%s: fallback\n", __FUNCTION__)); + RegionInit(®ion, &extents, 1); + if (gc->pCompositeClip) + RegionIntersect(®ion, ®ion, gc->pCompositeClip); + if (!RegionNotEmpty(®ion)) + return; + + sna_gc_move_to_cpu(gc); + sna_drawable_move_region_to_cpu(drawable, ®ion, true); + RegionUninit(®ion); + + fbFillSpans(drawable, gc, n, pt, width, sorted); +} + +static void +sna_set_spans(DrawablePtr drawable, GCPtr gc, char *src, + DDXPointPtr pt, int *width, int n, int sorted) +{ + BoxRec extents; + RegionRec region; + + if (sna_spans_extents(drawable, gc, n, pt, width, &extents)) + return; + + DBG(("%s: extents=(%d, %d), (%d, %d)\n", __FUNCTION__, + extents.x1, extents.y1, extents.x2, extents.y2)); + + RegionInit(®ion, &extents, 1); + if (gc->pCompositeClip) + RegionIntersect(®ion, ®ion, gc->pCompositeClip); + if (!RegionNotEmpty(®ion)) + return; + + sna_drawable_move_region_to_cpu(drawable, ®ion, true); + RegionUninit(®ion); + + fbSetSpans(drawable, gc, src, pt, width, n, sorted); +} + +static RegionPtr +sna_copy_plane(DrawablePtr src, DrawablePtr dst, GCPtr gc, + int src_x, int src_y, + int w, int h, + int dst_x, int dst_y, + unsigned long bit) +{ + BoxRec box; + RegionRec region; + + DBG(("%s: src=(%d, %d), dst=(%d, %d), size=%dx%d\n", __FUNCTION__, + src_x, src_y, dst_x, dst_y, w, h)); + + box.x1 = dst_x + dst->x; + box.y1 = dst_y + dst->y; + box.x2 = box.x1 + w; + box.y2 = box.y1 + h; + + RegionInit(®ion, &box, 1); + if (gc->pCompositeClip) + RegionIntersect(®ion, ®ion, gc->pCompositeClip); + + sna_drawable_move_region_to_cpu(dst, ®ion, true); + RegionTranslate(®ion, + src_x - dst_x - dst->x + src->x, + src_y - dst_y - dst->y + src->y); + sna_drawable_move_region_to_cpu(src, ®ion, false); + + return fbCopyPlane(src, dst, gc, src_x, src_y, w, h, dst_x, dst_y, bit); +} + +static Bool +sna_poly_point_blt(DrawablePtr drawable, + struct kgem_bo *bo, + struct sna_damage **damage, + GCPtr gc, int mode, int n, DDXPointPtr pt) +{ + struct sna *sna = to_sna_from_drawable(drawable); + PixmapPtr pixmap = get_drawable_pixmap(drawable); + RegionPtr clip = fbGetCompositeClip(gc); + struct sna_fill_op fill; + DDXPointRec last; + int16_t dx, dy; + + DBG(("%s: alu=%d, pixel=%08lx\n", __FUNCTION__, gc->alu, gc->fgPixel)); + + if (!sna_fill_init_blt(&fill, sna, pixmap, bo, gc->alu, gc->fgPixel)) + return FALSE; + + get_drawable_deltas(drawable, pixmap, &dx, &dy); + + last.x = drawable->x; + last.y = drawable->y; + + while (n--) { + int x, y; + + x = pt->x; + y = pt->y; + pt++; + if (mode == CoordModePrevious) { + x += last.x; + y += last.x; + last.x = x; + last.y = y; + } else { + x += drawable->x; + y += drawable->y; + } + + if (RegionContainsPoint(clip, x, y, NULL)) { + fill.blt(sna, &fill, x + dx, y + dy, 1, 1); + if (damage) { + BoxRec box; + + box.x1 = x + dx; + box.y1 = x + dx; + box.x2 = box.x1 + 1; + box.y2 = box.y1 + 1; + + assert_pixmap_contains_box(pixmap, &box); + sna_damage_add_box(damage, &box); + } + } + } + fill.done(sna, &fill); + return TRUE; +} + +static Bool +sna_poly_point_extents(DrawablePtr drawable, GCPtr gc, + int mode, int n, DDXPointPtr pt, BoxPtr out) +{ + BoxRec box; + + if (n == 0) + return true; + + box.x2 = box.x1 = pt->x; + box.y2 = box.y1 = pt->y; + while (--n) { + pt++; + BOX_ADD_PT(box, pt->x, pt->y); + } + box.x2++; + box.y2++; + + TRIM_AND_TRANSLATE_BOX(box, drawable, gc); + *out = box; + return BOX_EMPTY(box); +} + +static void +sna_poly_point(DrawablePtr drawable, GCPtr gc, + int mode, int n, DDXPointPtr pt) +{ + struct sna *sna = to_sna_from_drawable(drawable); + BoxRec extents; + RegionRec region; + + DBG(("%s(mode=%d, n=%d, pt[0]=(%d, %d)\n", + __FUNCTION__, mode, n, pt[0].x, pt[0].y)); + + if (sna_poly_point_extents(drawable, gc, mode, n, pt, &extents)) + return; + + DBG(("%s: extents (%d, %d), (%d, %d)\n", __FUNCTION__, + extents.x1, extents.y1, extents.x2, extents.y2)); + + if (sna->kgem.wedged) + goto fallback; + + if (gc->fillStyle == FillSolid && + PM_IS_SOLID(drawable, gc->planemask)) { + struct sna_pixmap *priv = sna_pixmap_from_drawable(drawable); + + DBG(("%s: trying solid fill [%08lx] blt paths\n", + __FUNCTION__, gc->fgPixel)); + + if (sna_drawable_use_gpu_bo(drawable, &extents) && + sna_poly_point_blt(drawable, + priv->gpu_bo, + priv->gpu_only ? NULL : &priv->gpu_damage, + gc, mode, n, pt)) + return; + + if (sna_drawable_use_cpu_bo(drawable, &extents) && + sna_poly_point_blt(drawable, + priv->cpu_bo, + &priv->cpu_damage, + gc, mode, n, pt)) + return; + } + +fallback: + DBG(("%s: fallback\n", __FUNCTION__)); + RegionInit(®ion, &extents, 1); + if (gc->pCompositeClip) + RegionIntersect(®ion, ®ion, gc->pCompositeClip); + if (!RegionNotEmpty(®ion)) + return; + + sna_drawable_move_region_to_cpu(drawable, ®ion, true); + RegionUninit(®ion); + + fbPolyPoint(drawable, gc, mode, n, pt); +} + +static Bool +sna_poly_line_can_blt(int mode, int n, DDXPointPtr pt) +{ + int i; + + if (mode == CoordModePrevious) { + for (i = 1; i < n; i++) { + if (pt[i].x != 0 && pt[i].y != 0) + return FALSE; + } + } else { + for (i = 1; i < n; i++) { + if (pt[i].x != pt[i-1].x && pt[i].y != pt[i-1].y) + return FALSE; + } + } + + return TRUE; +} + +static Bool +sna_poly_line_blt(DrawablePtr drawable, + struct kgem_bo *bo, + struct sna_damage **damage, + GCPtr gc, int mode, int n, DDXPointPtr pt) +{ + struct sna *sna = to_sna_from_drawable(drawable); + PixmapPtr pixmap = get_drawable_pixmap(drawable); + RegionPtr clip = fbGetCompositeClip(gc); + struct sna_fill_op fill; + DDXPointRec last; + int16_t dx, dy; + int first; + + DBG(("%s: alu=%d, fg=%08lx\n", __FUNCTION__, gc->alu, gc->fgPixel)); + + if (!sna_fill_init_blt(&fill, sna, pixmap, bo, gc->alu, gc->fgPixel)) + return FALSE; + + get_drawable_deltas(drawable, pixmap, &dx, &dy); + + last.x = drawable->x; + last.y = drawable->y; + first = 1; + + while (n--) { + int nclip; + BoxPtr box; + int x, y; + + x = pt->x; + y = pt->y; + pt++; + if (mode == CoordModePrevious) { + x += last.x; + y += last.x; + } else { + x += drawable->x; + y += drawable->y; + } + + if (!first) { + for (nclip = REGION_NUM_RECTS(clip), box = REGION_RECTS(clip); nclip--; box++) { + BoxRec r; + + if (last.x == x) { + r.x1 = last.x; + r.x2 = last.x + 1; + } else { + r.x1 = last.x < x ? last.x : x; + r.x2 = last.x > x ? last.x : x; + } + if (last.y == y) { + r.y1 = last.y; + r.y2 = last.y + 1; + } else { + r.y1 = last.y < y ? last.y : y; + r.y2 = last.y > y ? last.y : y; + } + DBG(("%s: (%d, %d) -> (%d, %d) clipping line (%d, %d), (%d, %d) against box (%d, %d), (%d, %d)\n", + __FUNCTION__, + last.x, last.y, x, y, + r.x1, r.y1, r.x2, r.y2, + box->x1, box->y1, box->x2, box->y2)); + if (box_intersect(&r, box)) { + r.x1 += dx; + r.x2 += dx; + r.y1 += dy; + r.y2 += dy; + DBG(("%s: blt (%d, %d), (%d, %d)\n", + __FUNCTION__, + r.x1, r.y1, r.x2, r.y2)); + fill.blt(sna, &fill, + r.x1, r.y1, + r.x2-r.x1, r.y2-r.y1); + if (damage) { + assert_pixmap_contains_box(pixmap, &r); + sna_damage_add_box(damage, &r); + } + } + } + } + + last.x = x; + last.y = y; + first = 0; + } + fill.done(sna, &fill); + return TRUE; +} + +static Bool +sna_poly_line_extents(DrawablePtr drawable, GCPtr gc, + int mode, int n, DDXPointPtr pt, + BoxPtr out) +{ + BoxRec box; + int extra = gc->lineWidth >> 1; + + if (n == 0) + return true; + + if (n > 1) { + if (gc->joinStyle == JoinMiter) + extra = 6 * gc->lineWidth; + else if (gc->capStyle == CapProjecting) + extra = gc->lineWidth; + } + + box.x2 = box.x1 = pt->x; + box.y2 = box.y1 = pt->y; + if (mode == CoordModePrevious) { + int x = box.x1; + int y = box.y1; + while (--n) { + pt++; + x += pt->x; + y += pt->y; + BOX_ADD_PT(box, x, y); + } + } else { + while (--n) { + pt++; + BOX_ADD_PT(box, pt->x, pt->y); + } + } + box.x2++; + box.y2++; + + if (extra) { + box.x1 -= extra; + box.x2 += extra; + box.y1 -= extra; + box.y2 += extra; + } + + TRIM_AND_TRANSLATE_BOX(box, drawable, gc); + *out = box; + return BOX_EMPTY(box); +} + +static void +sna_poly_line(DrawablePtr drawable, GCPtr gc, + int mode, int n, DDXPointPtr pt) +{ + struct sna *sna = to_sna_from_drawable(drawable); + BoxRec extents; + RegionRec region; + + DBG(("%s(mode=%d, n=%d, pt[0]=(%d, %d)\n", + __FUNCTION__, mode, n, pt[0].x, pt[0].y)); + + if (sna_poly_line_extents(drawable, gc, mode, n, pt, &extents)) + return; + + DBG(("%s: extents (%d, %d), (%d, %d)\n", __FUNCTION__, + extents.x1, extents.y1, extents.x2, extents.y2)); + + if (sna->kgem.wedged) + goto fallback; + + if (gc->fillStyle == FillSolid && + gc->lineStyle == LineSolid && + (gc->lineWidth == 0 || gc->lineWidth == 1) && + PM_IS_SOLID(drawable, gc->planemask) && + sna_poly_line_can_blt(mode, n, pt)) { + struct sna_pixmap *priv = sna_pixmap_from_drawable(drawable); + + DBG(("%s: trying solid fill [%08lx]\n", + __FUNCTION__, gc->fgPixel)); + + if (sna_drawable_use_gpu_bo(drawable, &extents) && + sna_poly_line_blt(drawable, + priv->gpu_bo, + priv->gpu_only ? NULL : &priv->gpu_damage, + gc, mode, n, pt)) + return; + + if (sna_drawable_use_cpu_bo(drawable, &extents) && + sna_poly_line_blt(drawable, + priv->cpu_bo, + &priv->cpu_damage, + gc, mode, n, pt)) + return; + } + +fallback: + DBG(("%s: fallback\n", __FUNCTION__)); + RegionInit(®ion, &extents, 1); + if (gc->pCompositeClip) + RegionIntersect(®ion, ®ion, gc->pCompositeClip); + if (!RegionNotEmpty(®ion)) + return; + + sna_gc_move_to_cpu(gc); + sna_drawable_move_region_to_cpu(drawable, ®ion, true); + RegionUninit(®ion); + + fbPolyLine(drawable, gc, mode, n, pt); +} + +static Bool +sna_poly_segment_can_blt(int n, xSegment *seg) +{ + while (n--) { + if (seg->x1 != seg->x2 && seg->y1 != seg->y2) + return FALSE; + + seg++; + } + + return TRUE; +} + +static Bool +sna_poly_segment_blt(DrawablePtr drawable, + struct kgem_bo *bo, + struct sna_damage **damage, + GCPtr gc, int n, xSegment *seg) +{ + struct sna *sna = to_sna_from_drawable(drawable); + PixmapPtr pixmap = get_drawable_pixmap(drawable); + RegionPtr clip = fbGetCompositeClip(gc); + struct sna_fill_op fill; + int16_t dx, dy; + + DBG(("%s: alu=%d, fg=%08lx\n", __FUNCTION__, gc->alu, gc->fgPixel)); + + if (!sna_fill_init_blt(&fill, sna, pixmap, bo, gc->alu, gc->fgPixel)) + return FALSE; + + get_drawable_deltas(drawable, pixmap, &dx, &dy); + while (n--) { + int x, y, width, height, nclip; + BoxPtr box; + + if (seg->x1 < seg->x2) { + x = seg->x1; + width = seg->x2; + } else { + x = seg->x2; + width = seg->x1; + } + width -= x - 1; + x += drawable->x; + + if (seg->y1 < seg->y2) { + y = seg->y1; + height = seg->y2; + } else { + y = seg->y2; + height = seg->y1; + } + height -= y - 1; + y += drawable->y; + + /* don't paint last pixel */ + if (gc->capStyle == CapNotLast) { + if (width == 1) + height--; + else + width--; + } + + DBG(("%s: [%d] (%d, %d)x(%d, %d) + (%d, %d)\n", __FUNCTION__, n, + x, y, width, height, dx, dy)); + for (nclip = REGION_NUM_RECTS(clip), box = REGION_RECTS(clip); nclip--; box++) { + BoxRec r = { x, y, x + width, y + height }; + if (box_intersect(&r, box)) { + r.x1 += dx; + r.x2 += dx; + r.y1 += dy; + r.y2 += dy; + fill.blt(sna, &fill, + r.x1, r.y1, + r.x2-r.x1, r.y2-r.y1); + if (damage) { + assert_pixmap_contains_box(pixmap, &r); + sna_damage_add_box(damage, &r); + } + } + } + + seg++; + } + fill.done(sna, &fill); + return TRUE; +} + +static Bool +sna_poly_segment_extents(DrawablePtr drawable, GCPtr gc, + int n, xSegment *seg, + BoxPtr out) +{ + BoxRec box; + int extra = gc->lineWidth; + + if (n == 0) + return true; + + if (gc->capStyle != CapProjecting) + extra >>= 1; + + if (seg->x2 > seg->x1) { + box.x1 = seg->x1; + box.x2 = seg->x2; + } else { + box.x2 = seg->x1; + box.x1 = seg->x2; + } + + if (seg->y2 > seg->y1) { + box.y1 = seg->y1; + box.y2 = seg->y2; + } else { + box.y2 = seg->y1; + box.y1 = seg->y2; + } + + while (--n) { + seg++; + if (seg->x2 > seg->x1) { + if (seg->x1 < box.x1) box.x1 = seg->x1; + if (seg->x2 > box.x2) box.x2 = seg->x2; + } else { + if (seg->x2 < box.x1) box.x1 = seg->x2; + if (seg->x1 > box.x2) box.x2 = seg->x1; + } + + if (seg->y2 > seg->y1) { + if (seg->y1 < box.y1) box.y1 = seg->y1; + if (seg->y2 > box.y2) box.y2 = seg->y2; + } else { + if (seg->y2 < box.y1) box.y1 = seg->y2; + if (seg->y1 > box.y2) box.y2 = seg->y1; + } + } + + box.x2++; + box.y2++; + + if (extra) { + box.x1 -= extra; + box.x2 += extra; + box.y1 -= extra; + box.y2 += extra; + } + + TRIM_AND_TRANSLATE_BOX(box, drawable, gc); + *out = box; + return BOX_EMPTY(box); +} + +static void +sna_poly_segment(DrawablePtr drawable, GCPtr gc, int n, xSegment *seg) +{ + struct sna *sna = to_sna_from_drawable(drawable); + BoxRec extents; + RegionRec region; + + DBG(("%s(n=%d, first=((%d, %d), (%d, %d))\n", __FUNCTION__, + n, seg->x1, seg->y1, seg->x2, seg->y2)); + + if (sna_poly_segment_extents(drawable, gc, n, seg, &extents)) + return; + + DBG(("%s: extents=(%d, %d), (%d, %d)\n", __FUNCTION__, + extents.x1, extents.y1, extents.x2, extents.y2)); + + if (sna->kgem.wedged) + goto fallback; + + if (gc->fillStyle == FillSolid && + gc->lineStyle == LineSolid && + gc->lineWidth == 0 && + PM_IS_SOLID(drawable, gc->planemask) && + sna_poly_segment_can_blt(n, seg)) { + struct sna_pixmap *priv = sna_pixmap_from_drawable(drawable); + + DBG(("%s: trying blt solid fill [%08lx] paths\n", + __FUNCTION__, gc->fgPixel)); + + if (sna_drawable_use_gpu_bo(drawable, &extents) && + sna_poly_segment_blt(drawable, + priv->gpu_bo, + priv->gpu_only ? NULL : &priv->gpu_damage, + gc, n, seg)) + return; + + if (sna_drawable_use_cpu_bo(drawable, &extents) && + sna_poly_segment_blt(drawable, + priv->cpu_bo, + &priv->cpu_damage, + gc, n, seg)) + return; + } + +fallback: + DBG(("%s: fallback\n", __FUNCTION__)); + RegionInit(®ion, &extents, 1); + if (gc->pCompositeClip) + RegionIntersect(®ion, ®ion, gc->pCompositeClip); + if (!RegionNotEmpty(®ion)) + return; + + sna_gc_move_to_cpu(gc); + sna_drawable_move_region_to_cpu(drawable, ®ion, true); + RegionUninit(®ion); + + fbPolySegment(drawable, gc, n, seg); +} + +static Bool +sna_poly_arc_extents(DrawablePtr drawable, GCPtr gc, + int n, xArc *arc, + BoxPtr out) +{ + int extra = gc->lineWidth >> 1; + BoxRec box; + + if (n == 0) + return true; + + box.x1 = arc->x; + box.x2 = box.x1 + arc->width; + box.y1 = arc->y; + box.y2 = box.y1 + arc->height; + + while (--n) { + arc++; + if (box.x1 > arc->x) + box.x1 = arc->x; + if (box.x2 < arc->x + arc->width) + box.x2 = arc->x + arc->width; + if (box.y1 > arc->y) + box.y1 = arc->y; + if (box.y2 < arc->y + arc->height) + box.y2 = arc->y + arc->height; + } + + if (extra) { + box.x1 -= extra; + box.x2 += extra; + box.y1 -= extra; + box.y2 += extra; + } + + box.x2++; + box.y2++; + + TRIM_AND_TRANSLATE_BOX(box, drawable, gc); + *out = box; + return BOX_EMPTY(box); +} + +static void +sna_poly_arc(DrawablePtr drawable, GCPtr gc, int n, xArc *arc) +{ + BoxRec extents; + RegionRec region; + + if (sna_poly_arc_extents(drawable, gc, n, arc, &extents)) + return; + + DBG(("%s: extents=(%d, %d), (%d, %d)\n", __FUNCTION__, + extents.x1, extents.y1, extents.x2, extents.y2)); + + RegionInit(®ion, &extents, 1); + if (gc->pCompositeClip) + RegionIntersect(®ion, ®ion, gc->pCompositeClip); + if (!RegionNotEmpty(®ion)) + return; + + sna_gc_move_to_cpu(gc); + sna_drawable_move_region_to_cpu(drawable, ®ion, true); + RegionUninit(®ion); + + fbPolyArc(drawable, gc, n, arc); +} + +static Bool +sna_poly_fill_rect_blt(DrawablePtr drawable, + struct kgem_bo *bo, + struct sna_damage **damage, + GCPtr gc, int n, + xRectangle *rect) +{ + struct sna *sna = to_sna_from_drawable(drawable); + PixmapPtr pixmap = get_drawable_pixmap(drawable); + RegionPtr clip = fbGetCompositeClip(gc); + struct sna_fill_op fill; + uint32_t pixel = gc->fillStyle == FillSolid ? gc->fgPixel : gc->tile.pixel; + int16_t dx, dy; + + DBG(("%s x %d [(%d, %d)+(%d, %d)...]\n", + __FUNCTION__, n, rect->x, rect->y, rect->width, rect->height)); + + if (!sna_fill_init_blt(&fill, sna, pixmap, bo, gc->alu, pixel)) { + DBG(("%s: unsupported blt\n", __FUNCTION__)); + return FALSE; + } + + get_drawable_deltas(drawable, pixmap, &dx, &dy); + if (REGION_NUM_RECTS(clip) == 1) { + BoxPtr box = REGION_RECTS(clip); + while (n--) { + BoxRec r; + + r.x1 = rect->x + drawable->x; + r.y1 = rect->y + drawable->y; + r.x2 = r.x1 + rect->width; + r.y2 = r.y1 + rect->height; + rect++; + + if (box_intersect(&r, box)) { + r.x1 += dx; + r.x2 += dx; + r.y1 += dy; + r.y2 += dy; + fill.blt(sna, &fill, + r.x1, r.y1, + r.x2-r.x1, r.y2-r.y1); + if (damage) { + assert_pixmap_contains_box(pixmap, &r); + sna_damage_add_box(damage, &r); + } + } + } + } else { + while (n--) { + RegionRec region; + BoxRec r,*box; + int nbox; + + r.x1 = rect->x + drawable->x; + r.y1 = rect->y + drawable->y; + r.x2 = r.x1 + rect->width; + r.y2 = r.y1 + rect->height; + rect++; + + RegionInit(®ion, &r, 1); + RegionIntersect(®ion, ®ion, clip); + + nbox = REGION_NUM_RECTS(®ion); + box = REGION_RECTS(®ion); + while (nbox--) { + box->x1 += dx; + box->x2 += dx; + box->y1 += dy; + box->y2 += dy; + fill.blt(sna, &fill, + box->x1, box->y1, + box->x2-box->x1, box->y2-box->y1); + if (damage) { + assert_pixmap_contains_box(pixmap, box); + sna_damage_add_box(damage, box); + } + box++; + } + + RegionUninit(®ion); + } + } + fill.done(sna, &fill); + return TRUE; +} + +static uint32_t +get_pixel(PixmapPtr pixmap) +{ + DBG(("%s\n", __FUNCTION__)); + sna_pixmap_move_to_cpu(pixmap, false); + switch (pixmap->drawable.bitsPerPixel) { + case 32: return *(uint32_t *)pixmap->devPrivate.ptr; + case 16: return *(uint16_t *)pixmap->devPrivate.ptr; + default: return *(uint8_t *)pixmap->devPrivate.ptr; + } +} + +static Bool +sna_poly_fill_rect_tiled(DrawablePtr drawable, + struct kgem_bo *bo, + struct sna_damage **damage, + GCPtr gc, int n, + xRectangle *rect) +{ + struct sna *sna = to_sna_from_drawable(drawable); + PixmapPtr pixmap = get_drawable_pixmap(drawable); + PixmapPtr tile = gc->tile.pixmap; + RegionPtr clip = fbGetCompositeClip(gc); + DDXPointPtr origin = &gc->patOrg; + CARD32 alu = gc->alu; + int tile_width, tile_height; + int16_t dx, dy; + + DBG(("%s x %d [(%d, %d)+(%d, %d)...]\n", + __FUNCTION__, n, rect->x, rect->y, rect->width, rect->height)); + + tile_width = tile->drawable.width; + tile_height = tile->drawable.height; + + get_drawable_deltas(drawable, pixmap, &dx, &dy); + + if (tile_width == 1 && tile_height == 1) { + struct sna_fill_op fill; + + if (!sna_fill_init_blt(&fill, sna, pixmap, bo, alu, get_pixel(tile))) { + DBG(("%s: unsupported blt\n", __FUNCTION__)); + return FALSE; + } + + if (REGION_NUM_RECTS(clip) == 1) { + BoxPtr box = REGION_RECTS(clip); + while (n--) { + BoxRec r; + + r.x1 = rect->x + drawable->x; + r.y1 = rect->y + drawable->y; + r.x2 = r.x1 + rect->width; + r.y2 = r.y1 + rect->height; + rect++; + + if (box_intersect(&r, box)) { + r.x1 += dx; + r.x2 += dx; + r.y1 += dy; + r.y2 += dy; + fill.blt(sna, &fill, + r.x1, r.y1, + r.x2-r.x1, r.y2-r.y1); + if (damage) { + assert_pixmap_contains_box(pixmap, &r); + sna_damage_add_box(damage, &r); + } + } + } + } else { + while (n--) { + RegionRec region; + BoxRec r,*box; + int nbox; + + r.x1 = rect->x + drawable->x; + r.y1 = rect->y + drawable->y; + r.x2 = r.x1 + rect->width; + r.y2 = r.y1 + rect->height; + rect++; + + RegionInit(®ion, &r, 1); + RegionIntersect(®ion, ®ion, clip); + + nbox = REGION_NUM_RECTS(®ion); + box = REGION_RECTS(®ion); + while (nbox--) { + box->x1 += dx; + box->x2 += dx; + box->y1 += dy; + box->y2 += dy; + fill.blt(sna, &fill, + box->x1, box->y1, + box->x2-box->x1, + box->y2-box->y1); + if (damage) { + assert_pixmap_contains_box(pixmap, box); + sna_damage_add_box(damage, box); + } + box++; + } + + RegionUninit(®ion); + } + } + fill.done(sna, &fill); + } else { + struct sna_copy_op copy; + + if (!sna_pixmap_move_to_gpu(tile)) + return FALSE; + + if (!sna_copy_init_blt(©, sna, + tile, sna_pixmap_get_bo(tile), + pixmap, bo, + alu)) { + DBG(("%s: unsupported blt\n", __FUNCTION__)); + return FALSE; + } + + if (REGION_NUM_RECTS(clip) == 1) { + const BoxPtr box = REGION_RECTS(clip); + while (n--) { + BoxRec r; + + r.x1 = rect->x + drawable->x; + r.y1 = rect->y + drawable->y; + r.x2 = r.x1 + rect->width; + r.y2 = r.y1 + rect->height; + rect++; + + if (box_intersect(&r, box)) { + int height = r.y2 - r.y1; + int dst_y = r.y1; + int tile_y = (r.y1 - drawable->y - origin->y) % tile_height; + while (height) { + int width = r.x2 - r.x1; + int dst_x = r.x1; + int tile_x = (r.x1 - drawable->x - origin->x) % tile_width; + int h = tile_height - tile_y; + if (h > height) + h = height; + height -= h; + + while (width > 0) { + int w = tile_width - tile_x; + if (w > width) + w = width; + width -= w; + + copy.blt(sna, ©, + tile_x, tile_y, + w, h, + dst_x + dx, dst_y + dy); + if (damage) { + BoxRec box; + box.x1 = dst_x + dx; + box.y1 = dst_y + dy; + box.x2 = box.x1 + w; + box.y2 = box.y1 + h; + assert_pixmap_contains_box(pixmap, &box); + sna_damage_add_box(damage, &box); + } + + dst_x += w; + tile_x = 0; + } + dst_y += h; + tile_y = 0; + } + } + } + } else { + while (n--) { + RegionRec region; + BoxRec r,*box; + int nbox; + + r.x1 = rect->x + drawable->x; + r.y1 = rect->y + drawable->y; + r.x2 = r.x1 + rect->width; + r.y2 = r.y1 + rect->height; + rect++; + + RegionInit(®ion, &r, 1); + RegionIntersect(®ion, ®ion, clip); + + nbox = REGION_NUM_RECTS(®ion); + box = REGION_RECTS(®ion); + while (nbox--) { + int height = box->y2 - box->y1; + int dst_y = box->y1; + int tile_y = (box->y1 - drawable->y - origin->y) % tile_height; + while (height) { + int width = box->x2 - box->x1; + int dst_x = box->x1; + int tile_x = (box->x1 - drawable->x - origin->x) % tile_width; + int h = tile_height - tile_y; + if (h > height) + h = height; + height -= h; + + while (width > 0) { + int w = tile_width - tile_x; + if (w > width) + w = width; + width -= w; + + copy.blt(sna, ©, + tile_x, tile_y, + w, h, + dst_x + dx, dst_y + dy); + if (damage) { + BoxRec box; + + box.x1 = dst_x + dx; + box.y1 = dst_y + dy; + box.x2 = box.x1 + w; + box.y2 = box.y1 + h; + + assert_pixmap_contains_box(pixmap, &box); + sna_damage_add_box(damage, &box); + } + + dst_x += w; + tile_x = 0; + } + dst_y += h; + tile_y = 0; + } + box++; + } + + RegionUninit(®ion); + } + } + copy.done(sna, ©); + } + return TRUE; +} + +static Bool +sna_poly_fill_rect_extents(DrawablePtr drawable, GCPtr gc, + int n, xRectangle *rect, + BoxPtr out) +{ + BoxRec box; + + if (n == 0) + return true; + + box.x1 = rect->x; + box.x2 = box.x1 + rect->width; + box.y1 = rect->y; + box.y2 = box.y1 + rect->height; + + while (--n) { + rect++; + BOX_ADD_RECT(box, rect->x, rect->y, rect->width, rect->height); + } + + TRIM_AND_TRANSLATE_BOX(box, drawable, gc); + *out = box; + return BOX_EMPTY(box); +} + +static void +sna_poly_fill_rect(DrawablePtr draw, GCPtr gc, int n, xRectangle *rect) +{ + struct sna *sna = to_sna_from_drawable(draw); + BoxRec extents; + RegionRec region; + + DBG(("%s(n=%d, PlaneMask: %lx (solid %d), solid fill: %d [style=%d, tileIsPixel=%d], alu=%d)\n", __FUNCTION__, + n, gc->planemask, !!PM_IS_SOLID(draw, gc->planemask), + (gc->fillStyle == FillSolid || + (gc->fillStyle == FillTiled && gc->tileIsPixel)), + gc->fillStyle, gc->tileIsPixel, + gc->alu)); + + if (sna_poly_fill_rect_extents(draw, gc, n, rect, &extents)) + return; + + if (sna->kgem.wedged) + goto fallback; + + if (!PM_IS_SOLID(draw, gc->planemask)) + goto fallback; + + if (gc->fillStyle == FillSolid || + (gc->fillStyle == FillTiled && gc->tileIsPixel)) { + struct sna_pixmap *priv = sna_pixmap_from_drawable(draw); + + DBG(("%s: solid fill [%08lx], testing for blt\n", + __FUNCTION__, + gc->fillStyle == FillSolid ? gc->fgPixel : gc->tile.pixel)); + + if (sna_drawable_use_gpu_bo(draw, &extents) && + sna_poly_fill_rect_blt(draw, + priv->gpu_bo, + priv->gpu_only ? NULL : &priv->gpu_damage, + gc, n, rect)) + return; + + if (sna_drawable_use_cpu_bo(draw, &extents) && + sna_poly_fill_rect_blt(draw, + priv->cpu_bo, + &priv->cpu_damage, + gc, n, rect)) + return; + } else if (gc->fillStyle == FillTiled) { + struct sna_pixmap *priv = sna_pixmap_from_drawable(draw); + + DBG(("%s: tiled fill, testing for blt\n", __FUNCTION__)); + + if (sna_drawable_use_gpu_bo(draw, &extents) && + sna_poly_fill_rect_tiled(draw, + priv->gpu_bo, + priv->gpu_only ? NULL : &priv->gpu_damage, + gc, n, rect)) + return; + + if (sna_drawable_use_cpu_bo(draw, &extents) && + sna_poly_fill_rect_tiled(draw, + priv->cpu_bo, + &priv->cpu_damage, + gc, n, rect)) + return; + } + +fallback: + DBG(("%s: fallback (%d, %d), (%d, %d)\n", __FUNCTION__, + extents.x1, extents.y1, extents.x2, extents.y2)); + RegionInit(®ion, &extents, 1); + if (gc->pCompositeClip) + RegionIntersect(®ion, ®ion, gc->pCompositeClip); + if (!RegionNotEmpty(®ion)) + return; + + sna_gc_move_to_cpu(gc); + sna_drawable_move_region_to_cpu(draw, ®ion, true); + RegionUninit(®ion); + + DBG(("%s: fallback - fbPolyFillRect\n", __FUNCTION__)); + fbPolyFillRect(draw, gc, n, rect); +} + +static void +sna_image_glyph(DrawablePtr drawable, GCPtr gc, + int x, int y, unsigned int n, + CharInfoPtr *info, pointer base) +{ + ExtentInfoRec extents; + BoxRec box; + RegionRec region; + + if (n == 0) + return; + + QueryGlyphExtents(gc->font, info, n, &extents); + if (extents.overallWidth >= 0) { + box.x1 = x; + box.x2 = x + extents.overallWidth; + } else { + box.x2 = x; + box.x1 = x + extents.overallWidth; + } + box.y1 = y - FONTASCENT(gc->font); + box.y2 = y + FONTDESCENT(gc->font); + TRIM_BOX(box, drawable); + TRANSLATE_BOX(box, drawable); + + DBG(("%s: extents(%d, %d), (%d, %d)\n", + __FUNCTION__, box.x1, box.y1, box.x2, box.y2)); + + RegionInit(®ion, &box, 1); + if (gc->pCompositeClip) + RegionIntersect(®ion, ®ion, gc->pCompositeClip); + if (!RegionNotEmpty(®ion)) + return; + + sna_gc_move_to_cpu(gc); + sna_drawable_move_region_to_cpu(drawable, ®ion, true); + RegionUninit(®ion); + + fbImageGlyphBlt(drawable, gc, x, y, n, info, base); +} + +static void +sna_poly_glyph(DrawablePtr drawable, GCPtr gc, + int x, int y, unsigned int n, + CharInfoPtr *info, pointer base) +{ + ExtentInfoRec extents; + BoxRec box; + RegionRec region; + + if (n == 0) + return; + + QueryGlyphExtents(gc->font, info, n, &extents); + box.x1 = x + extents.overallLeft; + box.y1 = y - extents.overallAscent; + box.x2 = x + extents.overallRight; + box.y2 = y + extents.overallDescent; + + TRIM_BOX(box, drawable); + TRANSLATE_BOX(box, drawable); + + DBG(("%s: extents(%d, %d), (%d, %d)\n", + __FUNCTION__, box.x1, box.y1, box.x2, box.y2)); + + RegionInit(®ion, &box, 1); + if (gc->pCompositeClip) + RegionIntersect(®ion, ®ion, gc->pCompositeClip); + if (!RegionNotEmpty(®ion)) + return; + + sna_gc_move_to_cpu(gc); + sna_drawable_move_region_to_cpu(drawable, ®ion, true); + RegionUninit(®ion); + + fbPolyGlyphBlt(drawable, gc, x, y, n, info, base); +} + +static void +sna_push_pixels(GCPtr gc, PixmapPtr bitmap, DrawablePtr drawable, + int w, int h, + int x, int y) +{ + BoxRec box; + RegionRec region; + + if (w == 0 || h == 0) + return; + + DBG(("%s (%d, %d)x(%d, %d)\n", __FUNCTION__, x, y, w, h)); + + box.x1 = x; + box.y1 = y; + if (!gc->miTranslate) { + box.x1 += drawable->x; + box.y1 += drawable->y; + } + box.x2 = box.x1 + w; + box.y2 = box.y1 + h; + + CLIP_BOX(box, gc); + if (BOX_EMPTY(box)) + return; + + DBG(("%s: extents(%d, %d), (%d, %d)\n", + __FUNCTION__, box.x1, box.y1, box.x2, box.y2)); + + RegionInit(®ion, &box, 1); + if (gc->pCompositeClip) + RegionIntersect(®ion, ®ion, gc->pCompositeClip); + if (!RegionNotEmpty(®ion)) + return; + + sna_gc_move_to_cpu(gc); + sna_pixmap_move_to_cpu(bitmap, false); + sna_drawable_move_region_to_cpu(drawable, ®ion, true); + RegionUninit(®ion); + + fbPushPixels(gc, bitmap, drawable, w, h, x, y); +} + +static const GCOps sna_gc_ops = { + sna_fill_spans, + sna_set_spans, + sna_put_image, + sna_copy_area, + sna_copy_plane, + sna_poly_point, + sna_poly_line, + sna_poly_segment, + miPolyRectangle, + sna_poly_arc, + miFillPolygon, + sna_poly_fill_rect, + miPolyFillArc, + miPolyText8, + miPolyText16, + miImageText8, + miImageText16, + sna_image_glyph, + sna_poly_glyph, + sna_push_pixels, +}; + +static void sna_validate_pixmap(DrawablePtr draw, PixmapPtr pixmap) +{ + if (draw->bitsPerPixel == pixmap->drawable.bitsPerPixel && + FbEvenTile(pixmap->drawable.width * + pixmap->drawable.bitsPerPixel)) { + DBG(("%s: flushing pixmap\n", __FUNCTION__)); + sna_pixmap_move_to_cpu(pixmap, true); + } +} + +static void +sna_validate_gc(GCPtr gc, unsigned long changes, DrawablePtr drawable) +{ + DBG(("%s\n", __FUNCTION__)); + + if (changes & GCTile && !gc->tileIsPixel) { + DBG(("%s: flushing tile pixmap\n", __FUNCTION__)); + sna_validate_pixmap(drawable, gc->tile.pixmap); + } + + if (changes & GCStipple && gc->stipple) { + DBG(("%s: flushing stipple pixmap\n", __FUNCTION__)); + sna_pixmap_move_to_cpu(gc->stipple, true); + } + + fbValidateGC(gc, changes, drawable); +} + +static const GCFuncs sna_gc_funcs = { + sna_validate_gc, + miChangeGC, + miCopyGC, + miDestroyGC, + miChangeClip, + miDestroyClip, + miCopyClip +}; + +static int sna_create_gc(GCPtr gc) +{ + if (!fbCreateGC(gc)) + return FALSE; + + gc->funcs = (GCFuncs *)&sna_gc_funcs; + gc->ops = (GCOps *)&sna_gc_ops; + return TRUE; +} + +static void +sna_get_image(DrawablePtr drawable, + int x, int y, int w, int h, + unsigned int format, unsigned long mask, + char *dst) +{ + BoxRec extents; + RegionRec region; + + DBG(("%s (%d, %d, %d, %d)\n", __FUNCTION__, x, y, w, h)); + + extents.x1 = x + drawable->x; + extents.y1 = y + drawable->y; + extents.x2 = extents.x1 + w; + extents.y2 = extents.y1 + h; + RegionInit(®ion, &extents, 1); + + sna_drawable_move_region_to_cpu(drawable, ®ion, false); + fbGetImage(drawable, x, y, w, h, format, mask, dst); + + RegionUninit(®ion); +} + +static void +sna_get_spans(DrawablePtr drawable, int wMax, + DDXPointPtr pt, int *width, int n, char *start) +{ + BoxRec extents; + RegionRec region; + + if (sna_spans_extents(drawable, NULL, n, pt, width, &extents)) + return; + + RegionInit(®ion, &extents, 1); + sna_drawable_move_region_to_cpu(drawable, ®ion, false); + RegionUninit(®ion); + + fbGetSpans(drawable, wMax, pt, width, n, start); +} + +static void +sna_copy_window(WindowPtr win, DDXPointRec origin, RegionPtr src) +{ + struct sna *sna = to_sna_from_drawable(&win->drawable); + PixmapPtr pixmap = fbGetWindowPixmap(win); + RegionRec dst; + int dx, dy; + + DBG(("%s origin=(%d, %d)\n", __FUNCTION__, origin.x, origin.y)); + + if (sna->kgem.wedged) { + sna_pixmap_move_to_cpu(pixmap, true); + fbCopyWindow(win, origin, src); + return; + } + + dx = origin.x - win->drawable.x; + dy = origin.y - win->drawable.y; + RegionTranslate(src, -dx, -dy); + + RegionNull(&dst); + RegionIntersect(&dst, &win->borderClip, src); +#ifdef COMPOSITE + if (pixmap->screen_x || pixmap->screen_y) + RegionTranslate(&dst, -pixmap->screen_x, -pixmap->screen_y); +#endif + + miCopyRegion(&pixmap->drawable, &pixmap->drawable, + NULL, &dst, dx, dy, sna_copy_boxes, 0, NULL); + + RegionUninit(&dst); +} + +static Bool sna_change_window_attributes(WindowPtr win, unsigned long mask) +{ + DBG(("%s\n", __FUNCTION__)); + + /* Check if the fb layer wishes to modify the attached pixmaps, + * to fix up mismatches between the window and pixmap depths. + */ + if (mask & CWBackPixmap && win->backgroundState == BackgroundPixmap) { + DBG(("%s: flushing background pixmap\n", __FUNCTION__)); + sna_validate_pixmap(&win->drawable, win->background.pixmap); + } + + if (mask & CWBorderPixmap && win->borderIsPixel == FALSE) { + DBG(("%s: flushing border pixmap\n", __FUNCTION__)); + sna_validate_pixmap(&win->drawable, win->border.pixmap); + } + + return fbChangeWindowAttributes(win, mask); +} + +static void +sna_add_traps(PicturePtr picture, INT16 x, INT16 y, int n, xTrap *t) +{ + DBG(("%s (%d, %d) x %d\n", __FUNCTION__, x, y, n)); + + sna_drawable_move_to_cpu(picture->pDrawable, true); + + fbAddTraps(picture, x, y, n, t); +} + +static void +sna_accel_flush_callback(CallbackListPtr *list, + pointer user_data, pointer call_data) +{ + struct sna *sna = user_data; + + if (sna->kgem.flush == 0 && list_is_empty(&sna->dirty_pixmaps)) + return; + + DBG(("%s\n", __FUNCTION__)); + + /* flush any pending damage from shadow copies to tfp clients */ + while (!list_is_empty(&sna->dirty_pixmaps)) { + struct sna_pixmap *priv = list_first_entry(&sna->dirty_pixmaps, + struct sna_pixmap, + list); + sna_pixmap_move_to_gpu(priv->pixmap); + } + + kgem_submit(&sna->kgem); +} + +static void sna_deferred_free(struct sna *sna) +{ + struct sna_pixmap *priv, *next; + + list_for_each_entry_safe(priv, next, &sna->deferred_free, list) { + if (priv->cpu_bo->gpu) + continue; + + list_del(&priv->list); + kgem_bo_destroy(&sna->kgem, priv->cpu_bo); + fbDestroyPixmap(priv->pixmap); + free(priv); + } +} + +static uint64_t read_timer(int fd) +{ + uint64_t count = 0; + int ret = read(fd, &count, sizeof(count)); + return count; + (void)ret; +} + +static struct sna_pixmap *sna_accel_scanout(struct sna *sna) +{ + PixmapPtr front = sna->shadow ? sna->shadow : sna->front; + struct sna_pixmap *priv = sna_pixmap(front); + return priv && priv->gpu_bo ? priv : NULL; +} + +#if HAVE_SYS_TIMERFD_H +#include <sys/timerfd.h> +#include <errno.h> + +static void _sna_accel_disarm_timer(struct sna *sna, int id) +{ + struct itimerspec to; + + DBG(("%s[%d] (time=%ld)\n", __FUNCTION__, id, (long)GetTimeInMillis())); + + memset(&to, 0, sizeof(to)); + timerfd_settime(sna->timer[id], 0, &to, NULL); + sna->timer_active &= ~(1<<id); +} + +#define return_if_timer_active(id) do { \ + if (sna->timer_active & (1<<(id))) \ + return read_timer(sna->timer[id]) > 0; \ +} while (0) + +static Bool sna_accel_do_flush(struct sna *sna) +{ + struct itimerspec to; + struct sna_pixmap *priv; + + return_if_timer_active(FLUSH_TIMER); + + priv = sna_accel_scanout(sna); + if (priv == NULL) + return FALSE; + + if (priv->cpu_damage == NULL && priv->gpu_bo->rq == NULL) + return FALSE; + + if (sna->timer[FLUSH_TIMER] == -1) + return TRUE; + + DBG(("%s, time=%ld\n", __FUNCTION__, (long)GetTimeInMillis())); + + /* Initial redraw after 10ms. */ + to.it_value.tv_sec = 0; + to.it_value.tv_nsec = 10 * 1000 * 1000; + + /* Then periodic updates at 50Hz.*/ + to.it_interval.tv_sec = 0; + to.it_interval.tv_nsec = 20 * 1000 * 1000; + timerfd_settime(sna->timer[FLUSH_TIMER], 0, &to, NULL); + + sna->timer_active |= 1 << FLUSH_TIMER; + return FALSE; +} + +static Bool sna_accel_do_expire(struct sna *sna) +{ + struct itimerspec to; + + return_if_timer_active(EXPIRE_TIMER); + + if (!kgem_needs_expire(&sna->kgem)) + return FALSE; + + if (sna->timer[EXPIRE_TIMER] == -1) + return TRUE; + + /* Initial expiration after 5s. */ + to.it_value.tv_sec = 5; + to.it_value.tv_nsec = 0; + + /* Then periodic update every 1s.*/ + to.it_interval.tv_sec = 1; + to.it_interval.tv_nsec = 0; + timerfd_settime(sna->timer[EXPIRE_TIMER], 0, &to, NULL); + + sna->timer_active |= 1 << EXPIRE_TIMER; + return FALSE; +} + +static void sna_accel_create_timers(struct sna *sna) +{ + int id; + + for (id = 0; id < NUM_TIMERS; id++) + sna->timer[id] = timerfd_create(CLOCK_MONOTONIC, TFD_NONBLOCK); +} +#else +static void sna_accel_create_timers(struct sna *sna) +{ + int id; + + for (id = 0; id < NUM_TIMERS; id++) + sna->timer[id] = -1; +} +static Bool sna_accel_do_flush(struct sna *sna) { return sna_accel_scanout(sna) != NULL; } +static Bool sna_accel_arm_expire(struct sna *sna) { return TRUE; } +static void _sna_accel_disarm_timer(struct sna *sna, int id) { } +#endif + +static void sna_accel_flush(struct sna *sna) +{ + struct sna_pixmap *priv = sna_accel_scanout(sna); + + DBG(("%s (time=%ld)\n", __FUNCTION__, (long)GetTimeInMillis())); + + sna_pixmap_move_to_gpu(priv->pixmap); + kgem_bo_flush(&sna->kgem, priv->gpu_bo); + + if (priv->gpu_bo->rq == NULL) + _sna_accel_disarm_timer(sna, FLUSH_TIMER); +} + +static void sna_accel_expire(struct sna *sna) +{ + DBG(("%s (time=%ld)\n", __FUNCTION__, (long)GetTimeInMillis())); + + if (!kgem_expire_cache(&sna->kgem)) + _sna_accel_disarm_timer(sna, EXPIRE_TIMER); +} + +static void sna_accel_install_timers(struct sna *sna) +{ + if (sna->timer[FLUSH_TIMER] != -1) + AddGeneralSocket(sna->timer[FLUSH_TIMER]); + + if (sna->timer[EXPIRE_TIMER] != -1) + AddGeneralSocket(sna->timer[EXPIRE_TIMER]); +} + +Bool sna_accel_pre_init(struct sna *sna) +{ + sna_accel_create_timers(sna); + return TRUE; +} + +Bool sna_accel_init(ScreenPtr screen, struct sna *sna) +{ + const char *backend; + + if (!dixRegisterPrivateKey(&sna_pixmap_index, PRIVATE_PIXMAP, 0)) + return FALSE; + + if (!AddCallback(&FlushCallback, sna_accel_flush_callback, sna)) + return FALSE; + + if (!sna_glyphs_init(screen)) + return FALSE; + + list_init(&sna->dirty_pixmaps); + list_init(&sna->deferred_free); + + sna_accel_install_timers(sna); + + screen->CreateGC = sna_create_gc; + screen->GetImage = sna_get_image; + screen->GetSpans = sna_get_spans; + screen->CopyWindow = sna_copy_window; + screen->ChangeWindowAttributes = sna_change_window_attributes; + screen->CreatePixmap = sna_create_pixmap; + screen->DestroyPixmap = sna_destroy_pixmap; + +#ifdef RENDER + { + PictureScreenPtr ps = GetPictureScreenIfSet(screen); + if (ps) { + ps->Composite = sna_composite; + ps->CompositeRects = sna_composite_rectangles; + ps->Glyphs = sna_glyphs; + ps->UnrealizeGlyph = sna_glyph_unrealize; + ps->AddTraps = sna_add_traps; + ps->Trapezoids = sna_composite_trapezoids; + } + } +#endif + + backend = "no"; + sna->have_render = false; + no_render_init(sna); + +#if !DEBUG_NO_RENDER + if (sna->chipset.info->gen >= 70) { + } else if (sna->chipset.info->gen >= 60) { + if ((sna->have_render = gen6_render_init(sna))) + backend = "Sandybridge"; + } else if (sna->chipset.info->gen >= 50) { + if ((sna->have_render = gen5_render_init(sna))) + backend = "Ironlake"; + } else if (sna->chipset.info->gen >= 40) { + if ((sna->have_render = gen4_render_init(sna))) + backend = "Broadwater"; + } else if (sna->chipset.info->gen >= 30) { + if ((sna->have_render = gen3_render_init(sna))) + backend = "gen3"; + } else if (sna->chipset.info->gen >= 20) { + if ((sna->have_render = gen2_render_init(sna))) + backend = "gen2"; + } +#endif + DBG(("%s(backend=%s, have_render=%d)\n", + __FUNCTION__, backend, sna->have_render)); + + xf86DrvMsg(sna->scrn->scrnIndex, X_INFO, + "SNA initialized with %s backend\n", + backend); + + return TRUE; +} + +Bool sna_accel_create(struct sna *sna) +{ + if (!sna_glyphs_create(sna)) + return FALSE; + + if (!sna_gradients_create(sna)) + return FALSE; + + return TRUE; +} + +void sna_accel_close(struct sna *sna) +{ + sna_glyphs_close(sna); + sna_gradients_close(sna); + + DeleteCallback(&FlushCallback, sna_accel_flush_callback, sna); +} + +static void sna_accel_throttle(struct sna *sna) +{ + if (sna->flags & SNA_NO_THROTTLE) + return; + + if (list_is_empty(&sna->kgem.requests)) + return; + + DBG(("%s (time=%ld)\n", __FUNCTION__, (long)GetTimeInMillis())); + + kgem_throttle(&sna->kgem); +} + +void sna_accel_block_handler(struct sna *sna) +{ + if (sna_accel_do_flush(sna)) + sna_accel_flush(sna); + + if (sna_accel_do_expire(sna)) + sna_accel_expire(sna); + + sna_accel_throttle(sna); +} + +void sna_accel_wakeup_handler(struct sna *sna) +{ + _kgem_retire(&sna->kgem); + sna_deferred_free(sna); + + if (sna->kgem.need_purge) + kgem_expire_cache(&sna->kgem); +} + +void sna_accel_free(struct sna *sna) +{ + int id; + + for (id = 0; id < NUM_TIMERS; id++) + if (sna->timer[id] != -1) { + close(sna->timer[id]); + sna->timer[id] = -1; + } +} diff --git a/src/sna/sna_blt.c b/src/sna/sna_blt.c new file mode 100644 index 00000000..cdcfc3b0 --- /dev/null +++ b/src/sna/sna_blt.c @@ -0,0 +1,1339 @@ +/* + * Based on code from intel_uxa.c and i830_xaa.c + * Copyright 1998-1999 Precision Insight, Inc., Cedar Park, Texas. + * Copyright (c) 2005 Jesse Barnes <jbarnes@virtuousgeek.org> + * Copyright (c) 2009-2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Chris Wilson <chris@chris-wilson.co.uk> + * + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "sna.h" +#include "sna_render.h" +#include "sna_reg.h" + +#include <mipict.h> +#include <fbpict.h> +#include <xaarop.h> + +#if DEBUG_BLT +#undef DBG +#define DBG(x) ErrorF x +#else +#define NDEBUG 1 +#endif + +#define NO_BLT_COMPOSITE 0 +#define NO_BLT_COPY 0 +#define NO_BLT_COPY_BOXES 0 +#define NO_BLT_FILL 0 +#define NO_BLT_FILL_BOXES 0 + +static const uint8_t copy_ROP[] = { + ROP_0, /* GXclear */ + ROP_DSa, /* GXand */ + ROP_SDna, /* GXandReverse */ + ROP_S, /* GXcopy */ + ROP_DSna, /* GXandInverted */ + ROP_D, /* GXnoop */ + ROP_DSx, /* GXxor */ + ROP_DSo, /* GXor */ + ROP_DSon, /* GXnor */ + ROP_DSxn, /* GXequiv */ + ROP_Dn, /* GXinvert */ + ROP_SDno, /* GXorReverse */ + ROP_Sn, /* GXcopyInverted */ + ROP_DSno, /* GXorInverted */ + ROP_DSan, /* GXnand */ + ROP_1 /* GXset */ +}; + +static const uint8_t fill_ROP[] = { + ROP_0, + ROP_DPa, + ROP_PDna, + ROP_P, + ROP_DPna, + ROP_D, + ROP_DPx, + ROP_DPo, + ROP_DPon, + ROP_PDxn, + ROP_Dn, + ROP_PDno, + ROP_Pn, + ROP_DPno, + ROP_DPan, + ROP_1 +}; + +static void nop_done(struct sna *sna, const struct sna_composite_op *op) +{ +} + +static void blt_done(struct sna *sna, const struct sna_composite_op *op) +{ + struct kgem *kgem = &sna->kgem; + + DBG(("%s: nbatch=%d\n", __FUNCTION__, kgem->nbatch)); + _kgem_set_mode(kgem, KGEM_BLT); +} + +static bool sna_blt_fill_init(struct sna *sna, + struct sna_blt_state *blt, + struct kgem_bo *bo, + int bpp, + uint8_t alu, + uint32_t pixel) +{ + struct kgem *kgem = &sna->kgem; + int pitch; + + + blt->bo[0] = bo; + + blt->cmd = XY_COLOR_BLT_CMD; + if (bpp == 32) + blt->cmd |= BLT_WRITE_ALPHA | BLT_WRITE_RGB; + + pitch = bo->pitch; + if (kgem->gen >= 40 && bo->tiling) { + blt->cmd |= BLT_DST_TILED; + pitch >>= 2; + } + if (pitch > MAXSHORT) + return FALSE; + + blt->overwrites = alu == GXcopy || alu == GXclear; + blt->br13 = (fill_ROP[alu] << 16) | pitch; + switch (bpp) { + default: assert(0); + case 32: blt->br13 |= 1 << 25; /* RGB8888 */ + case 16: blt->br13 |= 1 << 24; /* RGB565 */ + case 8: break; + } + + blt->pixel = pixel; + + kgem_set_mode(kgem, KGEM_BLT); + if (!kgem_check_bo_fenced(kgem, bo, NULL)) + _kgem_submit(kgem); + + return TRUE; +} + +static void sna_blt_fill_one(struct sna *sna, + const struct sna_blt_state *blt, + int x, int y, + int width, int height) +{ + struct kgem *kgem = &sna->kgem; + uint32_t *b; + + DBG(("%s: (%d, %d) x (%d, %d): %08x\n", + __FUNCTION__, x, y, width, height, blt->pixel)); + + assert(x >= 0); + assert(y >= 0); + assert((y+height) * blt->bo[0]->pitch <= blt->bo[0]->size); + + /* All too frequently one blt completely overwrites the previous */ + if (kgem->nbatch >= 6 && + blt->overwrites && + kgem->batch[kgem->nbatch-6] == blt->cmd && + kgem->batch[kgem->nbatch-4] == (y << 16 | x) && + kgem->batch[kgem->nbatch-3] == ((y+height) << 16 | (x+width)) && + kgem->reloc[kgem->nreloc-1].target_handle == blt->bo[0]->handle) { + DBG(("%s: replacing last fill\n", __FUNCTION__)); + kgem->batch[kgem->nbatch-5] = blt->br13; + kgem->batch[kgem->nbatch-1] = blt->pixel; + return; + } + + if (!kgem_check_batch(kgem, 6) || + kgem->nreloc + 1 > KGEM_RELOC_SIZE(kgem)) + _kgem_submit(kgem); + + b = kgem->batch + kgem->nbatch; + b[0] = blt->cmd; + b[1] = blt->br13; + b[2] = (y << 16) | x; + b[3] = ((y + height) << 16) | (x + width); + b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, + blt->bo[0], + I915_GEM_DOMAIN_RENDER << 16 | I915_GEM_DOMAIN_RENDER | KGEM_RELOC_FENCED, + 0); + b[5] = blt->pixel; + kgem->nbatch += 6; +} + +static Bool sna_blt_copy_init(struct sna *sna, + struct sna_blt_state *blt, + struct kgem_bo *src, + struct kgem_bo *dst, + int bpp, + uint8_t alu) +{ + struct kgem *kgem = &sna->kgem; + + blt->bo[0] = src; + blt->bo[1] = dst; + + blt->cmd = XY_SRC_COPY_BLT_CMD; + if (bpp == 32) + blt->cmd |= BLT_WRITE_ALPHA | BLT_WRITE_RGB; + + blt->pitch[0] = src->pitch; + if (kgem->gen >= 40 && src->tiling) { + blt->cmd |= BLT_SRC_TILED; + blt->pitch[0] >>= 2; + } + if (blt->pitch[0] > MAXSHORT) + return FALSE; + + blt->pitch[1] = dst->pitch; + if (kgem->gen >= 40 && dst->tiling) { + blt->cmd |= BLT_DST_TILED; + blt->pitch[1] >>= 2; + } + if (blt->pitch[1] > MAXSHORT) + return FALSE; + + blt->overwrites = alu == GXcopy || alu == GXclear; + blt->br13 = (copy_ROP[alu] << 16) | blt->pitch[1]; + switch (bpp) { + default: assert(0); + case 32: blt->br13 |= 1 << 25; /* RGB8888 */ + case 16: blt->br13 |= 1 << 24; /* RGB565 */ + case 8: break; + } + + kgem_set_mode(kgem, KGEM_BLT); + if (!kgem_check_bo_fenced(kgem, src, dst, NULL)) + _kgem_submit(kgem); + + return TRUE; +} + +static void sna_blt_copy_one(struct sna *sna, + const struct sna_blt_state *blt, + int src_x, int src_y, + int width, int height, + int dst_x, int dst_y) +{ + struct kgem *kgem = &sna->kgem; + uint32_t *b; + + DBG(("%s: (%d, %d) -> (%d, %d) x (%d, %d)\n", + __FUNCTION__, src_x, src_y, dst_x, dst_y, width, height)); + + assert(src_x >= 0); + assert(src_y >= 0); + assert((src_y + height) * blt->bo[0]->pitch <= blt->bo[0]->size); + assert(dst_x >= 0); + assert(dst_y >= 0); + assert((dst_y + height) * blt->bo[1]->pitch <= blt->bo[1]->size); + assert(width > 0); + assert(height > 0); + + /* Compare against a previous fill */ + if (kgem->nbatch >= 6 && + blt->overwrites && + kgem->batch[kgem->nbatch-6] == ((blt->cmd & ~XY_SRC_COPY_BLT_CMD) | XY_COLOR_BLT_CMD) && + kgem->batch[kgem->nbatch-4] == (dst_y << 16 | dst_x) && + kgem->batch[kgem->nbatch-3] == ((dst_y+height) << 16 | (dst_x+width)) && + kgem->reloc[kgem->nreloc-1].target_handle == blt->bo[1]->handle) { + DBG(("%s: replacing last fill\n", __FUNCTION__)); + b = kgem->batch + kgem->nbatch - 6; + b[0] = blt->cmd; + b[1] = blt->br13; + b[5] = (src_y << 16) | src_x; + b[6] = blt->pitch[0]; + b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7 - 6, + blt->bo[0], + I915_GEM_DOMAIN_RENDER << 16 | KGEM_RELOC_FENCED, + 0); + kgem->nbatch += 8 - 6; + return; + } + + if (kgem->nbatch + 8 > KGEM_BATCH_SIZE(kgem) || + kgem->nreloc + 2 > KGEM_RELOC_SIZE(kgem)) + _kgem_submit(kgem); + + b = kgem->batch + kgem->nbatch; + b[0] = blt->cmd; + b[1] = blt->br13; + b[2] = (dst_y << 16) | dst_x; + b[3] = ((dst_y + height) << 16) | (dst_x + width); + b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, + blt->bo[1], + I915_GEM_DOMAIN_RENDER << 16 | I915_GEM_DOMAIN_RENDER | KGEM_RELOC_FENCED, + 0); + b[5] = (src_y << 16) | src_x; + b[6] = blt->pitch[0]; + b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, + blt->bo[0], + I915_GEM_DOMAIN_RENDER << 16 | KGEM_RELOC_FENCED, + 0); + kgem->nbatch += 8; +} + +static Bool +get_rgba_from_pixel(uint32_t pixel, + uint16_t *red, + uint16_t *green, + uint16_t *blue, + uint16_t *alpha, + uint32_t format) +{ + int rbits, bbits, gbits, abits; + int rshift, bshift, gshift, ashift; + + rbits = PICT_FORMAT_R(format); + gbits = PICT_FORMAT_G(format); + bbits = PICT_FORMAT_B(format); + abits = PICT_FORMAT_A(format); + + if (PICT_FORMAT_TYPE(format) == PICT_TYPE_A) { + rshift = gshift = bshift = ashift = 0; + } else if (PICT_FORMAT_TYPE(format) == PICT_TYPE_ARGB) { + bshift = 0; + gshift = bbits; + rshift = gshift + gbits; + ashift = rshift + rbits; + } else if (PICT_FORMAT_TYPE(format) == PICT_TYPE_ABGR) { + rshift = 0; + gshift = rbits; + bshift = gshift + gbits; + ashift = bshift + bbits; + } else if (PICT_FORMAT_TYPE(format) == PICT_TYPE_BGRA) { + ashift = 0; + rshift = abits; + if (abits == 0) + rshift = PICT_FORMAT_BPP(format) - (rbits+gbits+bbits); + gshift = rshift + rbits; + bshift = gshift + gbits; + } else { + return FALSE; + } + + if (rbits) { + *red = ((pixel >> rshift) & ((1 << rbits) - 1)) << (16 - rbits); + while (rbits < 16) { + *red |= *red >> rbits; + rbits <<= 1; + } + } else + *red = 0; + + if (gbits) { + *green = ((pixel >> gshift) & ((1 << gbits) - 1)) << (16 - gbits); + while (gbits < 16) { + *green |= *green >> gbits; + gbits <<= 1; + } + } else + *green = 0; + + if (bbits) { + *blue = ((pixel >> bshift) & ((1 << bbits) - 1)) << (16 - bbits); + while (bbits < 16) { + *blue |= *blue >> bbits; + bbits <<= 1; + } + } else + *blue = 0; + + if (abits) { + *alpha = ((pixel >> ashift) & ((1 << abits) - 1)) << (16 - abits); + while (abits < 16) { + *alpha |= *alpha >> abits; + abits <<= 1; + } + } else + *alpha = 0xffff; + + return TRUE; +} + +Bool +sna_get_pixel_from_rgba(uint32_t * pixel, + uint16_t red, + uint16_t green, + uint16_t blue, + uint16_t alpha, + uint32_t format) +{ + int rbits, bbits, gbits, abits; + int rshift, bshift, gshift, ashift; + + rbits = PICT_FORMAT_R(format); + gbits = PICT_FORMAT_G(format); + bbits = PICT_FORMAT_B(format); + abits = PICT_FORMAT_A(format); + if (abits == 0) + abits = PICT_FORMAT_BPP(format) - (rbits+gbits+bbits); + + if (PICT_FORMAT_TYPE(format) == PICT_TYPE_A) { + *pixel = alpha >> (16 - abits); + return TRUE; + } + + if (!PICT_FORMAT_COLOR(format)) + return FALSE; + + if (PICT_FORMAT_TYPE(format) == PICT_TYPE_ARGB) { + bshift = 0; + gshift = bbits; + rshift = gshift + gbits; + ashift = rshift + rbits; + } else if (PICT_FORMAT_TYPE(format) == PICT_TYPE_ABGR) { + rshift = 0; + gshift = rbits; + bshift = gshift + gbits; + ashift = bshift + bbits; + } else if (PICT_FORMAT_TYPE(format) == PICT_TYPE_BGRA) { + ashift = 0; + rshift = abits; + gshift = rshift + rbits; + bshift = gshift + gbits; + } else + return FALSE; + + *pixel = 0; + *pixel |= (blue >> (16 - bbits)) << bshift; + *pixel |= (green >> (16 - gbits)) << gshift; + *pixel |= (red >> (16 - rbits)) << rshift; + *pixel |= (alpha >> (16 - abits)) << ashift; + + return TRUE; +} + +static uint32_t +color_convert(uint32_t pixel, + uint32_t src_format, + uint32_t dst_format) +{ + DBG(("%s: src=%08x [%08x]\n", __FUNCTION__, pixel, src_format)); + + if (src_format != dst_format) { + uint16_t red, green, blue, alpha; + + if (!get_rgba_from_pixel(pixel, + &red, &green, &blue, &alpha, + src_format)) + return 0; + + if (!sna_get_pixel_from_rgba(&pixel, + red, green, blue, alpha, + dst_format)) + return 0; + } + + DBG(("%s: dst=%08x [%08x]\n", __FUNCTION__, pixel, dst_format)); + return pixel; +} + +uint32_t +sna_rgba_for_color(uint32_t color, int depth) +{ + return color_convert(color, sna_format_for_depth(depth), PICT_a8r8g8b8); +} + +static uint32_t +get_pixel(PicturePtr picture) +{ + PixmapPtr pixmap = get_drawable_pixmap(picture->pDrawable); + + DBG(("%s: %p\n", __FUNCTION__, pixmap)); + + sna_pixmap_move_to_cpu(pixmap, false); + switch (pixmap->drawable.bitsPerPixel) { + case 32: return *(uint32_t *)pixmap->devPrivate.ptr; + case 16: return *(uint16_t *)pixmap->devPrivate.ptr; + default: return *(uint8_t *)pixmap->devPrivate.ptr; + } +} + +static uint32_t +get_solid_color(PicturePtr picture, uint32_t format) +{ + if (picture->pSourcePict) { + PictSolidFill *fill = (PictSolidFill *)picture->pSourcePict; + return color_convert(fill->color, PICT_a8r8g8b8, format); + } else + return color_convert(get_pixel(picture), picture->format, format); +} + +static Bool +is_solid(PicturePtr picture) +{ + if (picture->pSourcePict) { + if (picture->pSourcePict->type == SourcePictTypeSolidFill) + return TRUE; + } + + if (picture->pDrawable) { + if (picture->pDrawable->width == 1 && + picture->pDrawable->height == 1 && + picture->repeat) + return TRUE; + } + + return FALSE; +} + +Bool +sna_picture_is_solid(PicturePtr picture, uint32_t *color) +{ + if (!is_solid(picture)) + return FALSE; + + *color = get_solid_color(picture, PICT_a8r8g8b8); + return TRUE; +} + +static Bool +pixel_is_opaque(uint32_t pixel, uint32_t format) +{ + int abits; + + abits = PICT_FORMAT_A(format); + if (!abits) + return TRUE; + + if (PICT_FORMAT_TYPE(format) == PICT_TYPE_A || + PICT_FORMAT_TYPE(format) == PICT_TYPE_BGRA) { + return (pixel & ((1 << abits) - 1)) == ((1 << abits) - 1); + } else if (PICT_FORMAT_TYPE(format) == PICT_TYPE_ARGB || + PICT_FORMAT_TYPE(format) == PICT_TYPE_ABGR) { + int ashift = PICT_FORMAT_BPP(format) - abits; + return (pixel >> ashift) == ((1 << abits) - 1); + } else + return FALSE; +} + +static Bool +is_opaque_solid(PicturePtr picture) +{ + if (picture->pSourcePict) { + PictSolidFill *fill = (PictSolidFill *) picture->pSourcePict; + return (fill->color >> 24) == 0xff; + } else + return pixel_is_opaque(get_pixel(picture), picture->format); +} + +fastcall +static void blt_fill_composite(struct sna *sna, + const struct sna_composite_op *op, + const struct sna_composite_rectangles *r) +{ + int x1, x2, y1, y2; + + x1 = r->dst.x + op->dst.x; + y1 = r->dst.y + op->dst.y; + x2 = x1 + r->width; + y2 = y1 + r->height; + + if (x1 < 0) + x1 = 0; + if (y1 < 0) + y1 = 0; + + if (x2 > op->dst.width) + x2 = op->dst.width; + if (y2 > op->dst.height) + y2 = op->dst.height; + + if (x2 <= x1 || y2 <= y1) + return; + + sna_blt_fill_one(sna, &op->u.blt, x1, y1, x2-x1, y2-y1); +} + +static void blt_fill_composite_boxes(struct sna *sna, + const struct sna_composite_op *op, + const BoxRec *box, int n) +{ + do { + sna_blt_fill_one(sna, &op->u.blt, + box->x1 + op->dst.x, box->y1 + op->dst.y, + box->x2 - box->x1, box->y2 - box->y1); + box++; + } while (--n); +} + +static Bool +prepare_blt_clear(struct sna *sna, + struct sna_composite_op *op) +{ + DBG(("%s\n", __FUNCTION__)); + + op->blt = blt_fill_composite; + op->boxes = blt_fill_composite_boxes; + op->done = blt_done; + + return sna_blt_fill_init(sna, &op->u.blt, + op->dst.bo, + op->dst.pixmap->drawable.bitsPerPixel, + GXclear, 0); +} + +static bool +prepare_blt_fill(struct sna *sna, + struct sna_composite_op *op, + PicturePtr source) +{ + DBG(("%s\n", __FUNCTION__)); + + op->blt = blt_fill_composite; + op->boxes = blt_fill_composite_boxes; + op->done = blt_done; + + return sna_blt_fill_init(sna, &op->u.blt, op->dst.bo, + op->dst.pixmap->drawable.bitsPerPixel, + GXcopy, + get_solid_color(source, op->dst.format)); +} + +fastcall static void +blt_copy_composite(struct sna *sna, + const struct sna_composite_op *op, + const struct sna_composite_rectangles *r) +{ + int x1, x2, y1, y2; + int src_x, src_y; + + DBG(("%s: src=(%d, %d), dst=(%d, %d), size=(%d, %d)\n", + __FUNCTION__, + r->src.x, r->src.y, + r->dst.x, r->dst.y, + r->width, r->height)); + + /* XXX higher layer should have clipped? */ + + x1 = r->dst.x + op->dst.x; + y1 = r->dst.y + op->dst.y; + x2 = x1 + r->width; + y2 = y1 + r->height; + + src_x = r->src.x - x1; + src_y = r->src.y - y1; + + /* clip against dst */ + if (x1 < 0) + x1 = 0; + if (y1 < 0) + y1 = 0; + + if (x2 > op->dst.width) + x2 = op->dst.width; + + if (y2 > op->dst.height) + y2 = op->dst.height; + + DBG(("%s: box=(%d, %d), (%d, %d)\n", __FUNCTION__, x1, y1, x2, y2)); + + if (x2 <= x1 || y2 <= y1) + return; + + sna_blt_copy_one(sna, &op->u.blt, + x1 + src_x, y1 + src_y, + x2 - x1, y2 - y1, + x1, y1); +} + +static void blt_copy_composite_boxes(struct sna *sna, + const struct sna_composite_op *op, + const BoxRec *box, int nbox) +{ + DBG(("%s: nbox=%d\n", __FUNCTION__, nbox)); + do { + DBG(("%s: box (%d, %d), (%d, %d)\n", + __FUNCTION__, box->x1, box->y1, box->x2, box->y2)); + sna_blt_copy_one(sna, &op->u.blt, + box->x1 + op->u.blt.sx, box->y1 + op->u.blt.sy, + box->x2 - box->x1, box->y2 - box->y1, + box->x1 + op->dst.x, box->y1 + op->dst.y); + box++; + } while(--nbox); +} + +static Bool +prepare_blt_copy(struct sna *sna, + struct sna_composite_op *op) +{ + PixmapPtr src = op->u.blt.src_pixmap; + struct sna_pixmap *priv = sna_pixmap(src); + + if (priv->gpu_bo->tiling == I915_TILING_Y) + return FALSE; + + if (!kgem_check_bo_fenced(&sna->kgem, priv->gpu_bo, NULL)) + _kgem_submit(&sna->kgem); + + DBG(("%s\n", __FUNCTION__)); + + op->blt = blt_copy_composite; + op->boxes = blt_copy_composite_boxes; + op->done = blt_done; + + return sna_blt_copy_init(sna, &op->u.blt, + priv->gpu_bo, + op->dst.bo, + src->drawable.bitsPerPixel, + GXcopy); +} + +static void blt_vmap_done(struct sna *sna, const struct sna_composite_op *op) +{ + struct kgem_bo *bo = (struct kgem_bo *)op->u.blt.src_pixmap; + + blt_done(sna, op); + if (bo) { + struct kgem *kgem = &sna->kgem; + kgem_bo_sync(kgem, bo, true); + kgem_bo_destroy(kgem, bo); + } +} + +fastcall static void +blt_put_composite(struct sna *sna, + const struct sna_composite_op *op, + const struct sna_composite_rectangles *r) +{ + PixmapPtr dst = op->dst.pixmap; + PixmapPtr src = op->u.blt.src_pixmap; + struct sna_pixmap *dst_priv = sna_pixmap(dst); + int pitch = src->devKind; + char *data = src->devPrivate.ptr; + int bpp = src->drawable.bitsPerPixel; + + int16_t dst_x = r->dst.x + op->dst.x; + int16_t dst_y = r->dst.y + op->dst.y; + int16_t src_x = r->src.x + op->u.blt.sx; + int16_t src_y = r->src.y + op->u.blt.sy; + + if (!dst_priv->pinned && + dst_x <= 0 && dst_y <= 0 && + dst_x + r->width >= op->dst.width && + dst_y + r->height >= op->dst.height) { + data += (src_x - dst_x) * bpp / 8; + data += (src_y - dst_y) * pitch; + + dst_priv->gpu_bo = + sna_replace(sna, dst_priv->gpu_bo, + r->width, r->height, bpp, + data, pitch); + } else { + BoxRec box; + + box.x1 = dst_x; + box.y1 = dst_y; + box.x2 = dst_x + r->width; + box.y2 = dst_y + r->height; + + sna_write_boxes(sna, + dst_priv->gpu_bo, 0, 0, + data, pitch, bpp, src_x, src_y, + &box, 1); + } +} + +static void blt_put_composite_boxes(struct sna *sna, + const struct sna_composite_op *op, + const BoxRec *box, int n) +{ + PixmapPtr src = op->u.blt.src_pixmap; + struct sna_pixmap *dst_priv = sna_pixmap(op->dst.pixmap); + + DBG(("%s: src=(%d, %d), dst=(%d, %d) x %d\n", __FUNCTION__, + op->u.blt.sx, op->u.blt.sy, + op->dst.x, op->dst.y, n)); + + if (n == 1 && !dst_priv->pinned && + box->x2 - box->x1 == op->dst.width && + box->y2 - box->y1 == op->dst.height) { + int pitch = src->devKind; + int bpp = src->drawable.bitsPerPixel / 8; + char *data = src->devPrivate.ptr; + + data += (box->y1 + op->u.blt.sy) * pitch; + data += (box->x1 + op->u.blt.sx) * bpp; + + dst_priv->gpu_bo = + sna_replace(sna, + op->dst.bo, + op->dst.width, + op->dst.height, + src->drawable.bitsPerPixel, + data, pitch); + } else { + sna_write_boxes(sna, + op->dst.bo, op->dst.x, op->dst.y, + src->devPrivate.ptr, + src->devKind, + src->drawable.bitsPerPixel, + op->u.blt.sx, op->u.blt.sy, + box, n); + } +} + +static Bool +prepare_blt_put(struct sna *sna, + struct sna_composite_op *op) +{ + PixmapPtr src = op->u.blt.src_pixmap; + struct sna_pixmap *priv = sna_pixmap(src); + struct kgem_bo *src_bo = NULL; + struct kgem_bo *free_bo = NULL; + + DBG(("%s\n", __FUNCTION__)); + + if (priv) { + if (!priv->gpu_only) { + src_bo = priv->cpu_bo; + if (!src_bo) { + src_bo = kgem_create_map(&sna->kgem, + src->devPrivate.ptr, + pixmap_size(src), + 1); + priv->cpu_bo = src_bo; + } + } + } else { + src_bo = kgem_create_map(&sna->kgem, + src->devPrivate.ptr, + pixmap_size(src), + 0); + free_bo = src_bo; + } + if (src_bo) { + op->blt = blt_copy_composite; + op->boxes = blt_copy_composite_boxes; + + op->u.blt.src_pixmap = (void *)free_bo; + op->done = blt_vmap_done; + + src_bo->pitch = src->devKind; + if (!sna_blt_copy_init(sna, &op->u.blt, + src_bo, op->dst.bo, + op->dst.pixmap->drawable.bitsPerPixel, + GXcopy)) + return FALSE; + } else { + op->blt = blt_put_composite; + op->boxes = blt_put_composite_boxes; + op->done = nop_done; + } + + return TRUE; +} + +static Bool +has_gpu_area(PixmapPtr pixmap, int x, int y, int w, int h) +{ + struct sna_pixmap *priv = sna_pixmap(pixmap); + BoxRec area; + + if (!priv) + return FALSE; + if (!priv->gpu_bo) + return FALSE; + + if (priv->cpu_damage == NULL) + return TRUE; + + area.x1 = x; + area.y1 = y; + area.x2 = x + w; + area.y2 = y + h; + return sna_damage_contains_box(priv->cpu_damage, + &area) == PIXMAN_REGION_OUT; +} + +static Bool +has_cpu_area(PixmapPtr pixmap, int x, int y, int w, int h) +{ + struct sna_pixmap *priv = sna_pixmap(pixmap); + BoxRec area; + + if (!priv) + return TRUE; + if (!priv->gpu_bo) + return TRUE; + if (priv->gpu_only) + return FALSE; + + if (priv->gpu_damage == NULL) + return TRUE; + + area.x1 = x; + area.y1 = y; + area.x2 = x + w; + area.y2 = y + h; + return sna_damage_contains_box(priv->gpu_damage, + &area) == PIXMAN_REGION_OUT; +} + +Bool +sna_blt_composite(struct sna *sna, + uint32_t op, + PicturePtr src, + PicturePtr dst, + int16_t x, int16_t y, + int16_t dst_x, int16_t dst_y, + int16_t width, int16_t height, + struct sna_composite_op *tmp) +{ + struct sna_blt_state *blt = &tmp->u.blt; + PictFormat src_format = src->format; + struct sna_pixmap *priv; + int16_t tx, ty; + Bool ret; + +#if DEBUG_NO_BLT || NO_BLT_COMPOSITE + return FALSE; +#endif + + DBG(("%s (%d, %d), (%d, %d), %dx%d\n", + __FUNCTION__, x, y, dst_x, dst_y, width, height)); + + switch (dst->pDrawable->bitsPerPixel) { + case 8: + case 16: + case 32: + break; + default: + DBG(("%s: unhandled bpp: %d\n", __FUNCTION__, + dst->pDrawable->bitsPerPixel)); + return FALSE; + } + + tmp->dst.pixmap = get_drawable_pixmap(dst->pDrawable); + priv = sna_pixmap_move_to_gpu(tmp->dst.pixmap); + if (priv == NULL || priv->gpu_bo->tiling == I915_TILING_Y) { + DBG(("%s: dst not on the gpu or using Y-tiling\n", + __FUNCTION__)); + return FALSE; + } + + tmp->dst.format = dst->format; + tmp->dst.width = tmp->dst.pixmap->drawable.width; + tmp->dst.height = tmp->dst.pixmap->drawable.height; + get_drawable_deltas(dst->pDrawable, tmp->dst.pixmap, + &tmp->dst.x, &tmp->dst.y); + tmp->dst.bo = priv->gpu_bo; + if (!priv->gpu_only) + tmp->damage = &priv->gpu_damage; + + if (!kgem_check_bo_fenced(&sna->kgem, priv->gpu_bo, NULL)) + _kgem_submit(&sna->kgem); + + if (op == PictOpClear) + return prepare_blt_clear(sna, tmp); + + if (is_solid(src)) { + if (op == PictOpOver && is_opaque_solid(src)) + op = PictOpSrc; + + if (op != PictOpSrc) { + DBG(("%s: unsuported op [%d] for blitting\n", + __FUNCTION__, op)); + return FALSE; + } + + return prepare_blt_fill(sna, tmp, src); + } + + if (!src->pDrawable) { + DBG(("%s: unsuported procedural source\n", + __FUNCTION__)); + return FALSE; + } + + if (src->pDrawable->bitsPerPixel != dst->pDrawable->bitsPerPixel) { + DBG(("%s: mismatching bpp src=%d, dst=%d\n", + __FUNCTION__, + src->pDrawable->bitsPerPixel, + dst->pDrawable->bitsPerPixel)); + return FALSE; + } + + if (op == PictOpOver && PICT_FORMAT_A(src_format) == 0) + op = PictOpSrc; + + if (op != PictOpSrc) { + DBG(("%s: unsuported op [%d] for blitting\n", + __FUNCTION__, op)); + return FALSE; + } + + if (src->filter == PictFilterConvolution) { + DBG(("%s: convolutions filters not handled\n", + __FUNCTION__)); + return FALSE; + } + + if (!(dst->format == src_format || + dst->format == PICT_FORMAT(PICT_FORMAT_BPP(src_format), + PICT_FORMAT_TYPE(src_format), + 0, + PICT_FORMAT_R(src_format), + PICT_FORMAT_G(src_format), + PICT_FORMAT_B(src_format)))) { + DBG(("%s: incompatible src/dst formats src=%08x, dst=%08x\n", + __FUNCTION__, (unsigned)src_format, dst->format)); + return FALSE; + } + + if (!sna_transform_is_integer_translation(src->transform, &tx, &ty)) { + DBG(("%s: source transform is not an integer translation\n", + __FUNCTION__)); + return FALSE; + } + x += tx; + y += ty; + + /* XXX tiling? */ + if (x < 0 || y < 0 || + x + width > src->pDrawable->width || + y + height > src->pDrawable->height) { + DBG(("%s: source extends outside of valid area\n", + __FUNCTION__)); + return FALSE; + } + + blt->src_pixmap = get_drawable_pixmap(src->pDrawable); + get_drawable_deltas(src->pDrawable, blt->src_pixmap, &tx, &ty); + x += tx + src->pDrawable->x; + y += ty + src->pDrawable->y; + assert(x >= 0); + assert(y >= 0); + assert(x + width <= blt->src_pixmap->drawable.width); + assert(y + height <= blt->src_pixmap->drawable.height); + + tmp->u.blt.sx = x - dst_x; + tmp->u.blt.sy = y - dst_y; + DBG(("%s: blt dst offset (%d, %d), source offset (%d, %d)\n", + __FUNCTION__, + tmp->dst.x, tmp->dst.y, tmp->u.blt.sx, tmp->u.blt.sy)); + + if (has_gpu_area(blt->src_pixmap, x, y, width, height)) + ret = prepare_blt_copy(sna, tmp); + else if (has_cpu_area(blt->src_pixmap, x, y, width, height)) + ret = prepare_blt_put(sna, tmp); + else if (sna_pixmap_move_to_gpu(blt->src_pixmap)) + ret = prepare_blt_copy(sna, tmp); + else + ret = prepare_blt_put(sna, tmp); + + return ret; +} + +static void sna_blt_fill_op_blt(struct sna *sna, + const struct sna_fill_op *op, + int16_t x, int16_t y, + int16_t width, int16_t height) +{ + sna_blt_fill_one(sna, &op->base.u.blt, x, y, width, height); +} + +static void sna_blt_fill_op_done(struct sna *sna, + const struct sna_fill_op *fill) +{ + blt_done(sna, &fill->base); +} + +bool sna_blt_fill(struct sna *sna, uint8_t alu, + struct kgem_bo *bo, int bpp, + uint32_t pixel, + struct sna_fill_op *fill) +{ +#if DEBUG_NO_BLT || NO_BLT_FILL + return FALSE; +#endif + + DBG(("%s(alu=%d, pixel=%x, bpp=%d)\n", __FUNCTION__, alu, pixel, bpp)); + + if (bo->tiling == I915_TILING_Y) { + DBG(("%s: rejected due to incompatible Y-tiling\n", + __FUNCTION__)); + return FALSE; + } + + if (!sna_blt_fill_init(sna, &fill->base.u.blt, + bo, bpp, alu, pixel)) + return FALSE; + + fill->blt = sna_blt_fill_op_blt; + fill->done = sna_blt_fill_op_done; + return TRUE; +} + +static void sna_blt_copy_op_blt(struct sna *sna, + const struct sna_copy_op *op, + int16_t src_x, int16_t src_y, + int16_t width, int16_t height, + int16_t dst_x, int16_t dst_y) +{ + sna_blt_copy_one(sna, &op->base.u.blt, + src_x, src_y, + width, height, + dst_x, dst_y); +} + +static void sna_blt_copy_op_done(struct sna *sna, + const struct sna_copy_op *op) +{ + blt_done(sna, &op->base); +} + +bool sna_blt_copy(struct sna *sna, uint8_t alu, + struct kgem_bo *src, + struct kgem_bo *dst, + int bpp, + struct sna_copy_op *op) +{ +#if DEBUG_NO_BLT || NO_BLT_COPY + return FALSE; +#endif + + if (src->tiling == I915_TILING_Y) + return FALSE; + + if (dst->tiling == I915_TILING_Y) + return FALSE; + + if (!sna_blt_copy_init(sna, &op->base.u.blt, + src, dst, + bpp, alu)) + return FALSE; + + op->blt = sna_blt_copy_op_blt; + op->done = sna_blt_copy_op_done; + return TRUE; +} + +Bool sna_blt_fill_boxes(struct sna *sna, uint8_t alu, + struct kgem_bo *bo, int bpp, + uint32_t color, + const BoxRec *box, int nbox) +{ + struct kgem *kgem = &sna->kgem; + int br13, cmd; + +#if DEBUG_NO_BLT || NO_BLT_FILL_BOXES + return FALSE; +#endif + + DBG(("%s (%d, %08x, %d) x %d\n", + __FUNCTION__, bpp, color, alu, nbox)); + + if (bo->tiling == I915_TILING_Y) + return FALSE; + + cmd = XY_COLOR_BLT_CMD; + if (bpp == 32) + cmd |= BLT_WRITE_ALPHA | BLT_WRITE_RGB; + + br13 = bo->pitch; + if (kgem->gen >= 40 && bo->tiling) { + cmd |= BLT_DST_TILED; + br13 >>= 2; + } + if (br13 > MAXSHORT) + return FALSE; + + br13 |= fill_ROP[alu] << 16; + switch (bpp) { + default: assert(0); + case 32: br13 |= 1 << 25; /* RGB8888 */ + case 16: br13 |= 1 << 24; /* RGB565 */ + case 8: break; + } + + kgem_set_mode(kgem, KGEM_BLT); + if (!kgem_check_batch(kgem, 6) || + !kgem_check_bo_fenced(kgem, bo, NULL) || + kgem->nreloc + 1 > KGEM_RELOC_SIZE(kgem)) + _kgem_submit(kgem); + + do { + int nbox_this_time; + + nbox_this_time = nbox; + if (6*nbox_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) + nbox_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 6; + if (nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc) + nbox_this_time = KGEM_RELOC_SIZE(kgem) - kgem->nreloc; + assert(nbox_this_time); + nbox -= nbox_this_time; + + do { + uint32_t *b = kgem->batch + kgem->nbatch; + + DBG(("%s: box=((%d, %d), (%d, %d))\n", __FUNCTION__, + box->x1, box->y1, box->x2, box->y2)); + + assert(box->x1 >= 0); + assert(box->y1 >= 0); + + b[0] = cmd; + b[1] = br13; + b[2] = box->y1 << 16 | box->x1; + b[3] = box->y2 << 16 | box->x2; + b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, + bo, + I915_GEM_DOMAIN_RENDER << 16 | + I915_GEM_DOMAIN_RENDER | + KGEM_RELOC_FENCED, + 0); + b[5] = color; + kgem->nbatch += 6; + box++; + } while (--nbox_this_time); + + if (nbox) + _kgem_submit(kgem); + } while (nbox); + + _kgem_set_mode(kgem, KGEM_BLT); + return TRUE; +} + +Bool sna_blt_copy_boxes(struct sna *sna, uint8_t alu, + struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy, + struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, + int bpp, const BoxRec *box, int nbox) +{ + struct kgem *kgem = &sna->kgem; + int src_pitch, br13, cmd; + +#if DEBUG_NO_BLT || NO_BLT_COPY_BOXES + return FALSE; +#endif + + DBG(("%s src=(%d, %d) -> (%d, %d) x %d, tiling=(%d, %d), pitch=(%d, %d)\n", + __FUNCTION__, src_dx, src_dy, dst_dx, dst_dy, nbox, + src_bo->tiling, dst_bo->tiling, + src_bo->pitch, dst_bo->pitch)); + + if (src_bo->tiling == I915_TILING_Y) + return FALSE; + + if (dst_bo->tiling == I915_TILING_Y) + return FALSE; + + cmd = XY_SRC_COPY_BLT_CMD; + if (bpp == 32) + cmd |= BLT_WRITE_ALPHA | BLT_WRITE_RGB; + + src_pitch = src_bo->pitch; + if (kgem->gen >= 40 && src_bo->tiling) { + cmd |= BLT_SRC_TILED; + src_pitch >>= 2; + } + if (src_pitch > MAXSHORT) + return FALSE; + + br13 = dst_bo->pitch; + if (kgem->gen >= 40 && dst_bo->tiling) { + cmd |= BLT_DST_TILED; + br13 >>= 2; + } + if (br13 > MAXSHORT) + return FALSE; + + br13 |= copy_ROP[alu] << 16; + switch (bpp) { + default: assert(0); + case 32: br13 |= 1 << 25; /* RGB8888 */ + case 16: br13 |= 1 << 24; /* RGB565 */ + case 8: break; + } + + kgem_set_mode(kgem, KGEM_BLT); + if (!kgem_check_batch(kgem, 8) || + !kgem_check_bo_fenced(kgem, dst_bo, src_bo, NULL) || + kgem->nreloc + 2 > KGEM_RELOC_SIZE(kgem)) + _kgem_submit(kgem); + + do { + int nbox_this_time; + + nbox_this_time = nbox; + if (8*nbox_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) + nbox_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 8; + if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc) + nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc)/2; + assert(nbox_this_time); + nbox -= nbox_this_time; + + do { + uint32_t *b = kgem->batch + kgem->nbatch; + + DBG((" %s: box=(%d, %d)x(%d, %d)\n", + __FUNCTION__, + box->x1, box->y1, + box->x2 - box->x1, box->y2 - box->y1)); + + assert(box->x1 + src_dx >= 0); + assert(box->y1 + src_dy >= 0); + + assert(box->x1 + dst_dx >= 0); + assert(box->y1 + dst_dy >= 0); + + b[0] = cmd; + b[1] = br13; + b[2] = ((box->y1 + dst_dy) << 16) | (box->x1 + dst_dx); + b[3] = ((box->y2 + dst_dy) << 16) | (box->x2 + dst_dx); + b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, + dst_bo, + I915_GEM_DOMAIN_RENDER << 16 | + I915_GEM_DOMAIN_RENDER | + KGEM_RELOC_FENCED, + 0); + b[5] = ((box->y1 + src_dy) << 16) | (box->x1 + src_dx); + b[6] = src_pitch; + b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, + src_bo, + I915_GEM_DOMAIN_RENDER << 16 | + KGEM_RELOC_FENCED, + 0); + kgem->nbatch += 8; + box++; + } while (--nbox_this_time); + + if (nbox) + _kgem_submit(kgem); + } while (nbox); + + _kgem_set_mode(kgem, KGEM_BLT); + return TRUE; +} diff --git a/src/sna/sna_composite.c b/src/sna/sna_composite.c new file mode 100644 index 00000000..27b1ff3c --- /dev/null +++ b/src/sna/sna_composite.c @@ -0,0 +1,722 @@ +/* + * Copyright (c) 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Chris Wilson <chris@chris-wilson.co.uk> + * + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "sna.h" +#include "sna_render.h" +#include "sna_render_inline.h" + +#include <mipict.h> +#include <fbpict.h> + +#if DEBUG_COMPOSITE +#undef DBG +#define DBG(x) ErrorF x +#else +#define NDEBUG 1 +#endif + +static void dst_move_area_to_cpu(PicturePtr picture, + uint8_t op, + int x, int y, + int width, int height) +{ + RegionRec area; + BoxRec box; + + DBG(("%s: (%d, %d), (%d %d)\n", __FUNCTION__, x, y, width, height)); + + box.x1 = x; + box.y1 = y; + box.x2 = x + width; + box.y2 = y + height; + RegionInit(&area, &box, 1); + if (picture->pCompositeClip) + RegionIntersect(&area, &area, picture->pCompositeClip); + sna_drawable_move_region_to_cpu(picture->pDrawable, &area, true); + RegionUninit(&area); +} + +#define BOUND(v) (INT16) ((v) < MINSHORT ? MINSHORT : (v) > MAXSHORT ? MAXSHORT : (v)) + +static inline pixman_bool_t +clip_to_dst(pixman_region16_t *region, + pixman_region16_t *clip, + int dx, + int dy) +{ + DBG(("%s: region: %dx[(%d, %d), (%d, %d)], clip: %dx[(%d, %d), (%d, %d)]\n", + __FUNCTION__, + pixman_region_n_rects(region), + region->extents.x1, region->extents.y1, + region->extents.x2, region->extents.y2, + pixman_region_n_rects(clip), + clip->extents.x1, clip->extents.y1, + clip->extents.x2, clip->extents.y2)); + + if (pixman_region_n_rects(region) == 1 && + pixman_region_n_rects(clip) == 1) { + pixman_box16_t *r = pixman_region_rectangles(region, NULL); + pixman_box16_t *c = pixman_region_rectangles(clip, NULL); + int v; + + if (r->x1 < (v = c->x1 + dx)) + r->x1 = BOUND(v); + if (r->x2 > (v = c->x2 + dx)) + r->x2 = BOUND(v); + if (r->y1 < (v = c->y1 + dy)) + r->y1 = BOUND(v); + if (r->y2 > (v = c->y2 + dy)) + r->y2 = BOUND(v); + + if (r->x1 >= r->x2 || r->y1 >= r->y2) + pixman_region_init(region); + } else if (!pixman_region_not_empty(clip)) { + return FALSE; + } else { + if (dx || dy) + pixman_region_translate(region, -dx, -dy); + if (!pixman_region_intersect(region, region, clip)) + return FALSE; + if (dx || dy) + pixman_region_translate(region, dx, dy); + } + return pixman_region_not_empty(region); +} + +static inline Bool +clip_to_src(RegionPtr region, PicturePtr p, int dx, int dy) +{ + Bool result; + + if (p->clientClipType == CT_NONE) + return TRUE; + + pixman_region_translate(p->clientClip, + p->clipOrigin.x + dx, + p->clipOrigin.y + dy); + + result = RegionIntersect(region, region, p->clientClip); + + pixman_region_translate(p->clientClip, + -(p->clipOrigin.x + dx), + -(p->clipOrigin.y + dy)); + + return result && pixman_region_not_empty(region); +} + +Bool +sna_compute_composite_region(RegionPtr region, + PicturePtr src, PicturePtr mask, PicturePtr dst, + INT16 src_x, INT16 src_y, + INT16 mask_x, INT16 mask_y, + INT16 dst_x, INT16 dst_y, + CARD16 width, CARD16 height) +{ + int v; + + DBG(("%s: dst=(%d, %d)x(%d, %d)\n", + __FUNCTION__, + dst_x, dst_y, + width, height)); + + region->extents.x1 = dst_x < 0 ? 0 : dst_x; + v = dst_x + width; + if (v > dst->pDrawable->width) + v = dst->pDrawable->width; + region->extents.x2 = v; + + region->extents.y1 = dst_y < 0 ? 0 : dst_y; + v = dst_y + height; + if (v > dst->pDrawable->height) + v = dst->pDrawable->height; + region->extents.y2 = v; + + region->data = 0; + + DBG(("%s: initial clip against dst->pDrawable: (%d, %d), (%d, %d)\n", + __FUNCTION__, + region->extents.x1, region->extents.y1, + region->extents.x2, region->extents.y2)); + + if (region->extents.x1 >= region->extents.x2 || + region->extents.y1 >= region->extents.y2) + return FALSE; + + region->extents.x1 += dst->pDrawable->x; + region->extents.x2 += dst->pDrawable->x; + region->extents.y1 += dst->pDrawable->y; + region->extents.y2 += dst->pDrawable->y; + + dst_x += dst->pDrawable->x; + dst_y += dst->pDrawable->y; + + /* clip against dst */ + if (!clip_to_dst(region, dst->pCompositeClip, 0, 0)) + return FALSE; + + DBG(("%s: clip against dst->pCompositeClip: (%d, %d), (%d, %d)\n", + __FUNCTION__, + region->extents.x1, region->extents.y1, + region->extents.x2, region->extents.y2)); + + if (dst->alphaMap) { + if (!clip_to_dst(region, dst->alphaMap->pCompositeClip, + -dst->alphaOrigin.x, + -dst->alphaOrigin.y)) { + pixman_region_fini (region); + return FALSE; + } + } + + /* clip against src */ + if (src->pDrawable) { + src_x += src->pDrawable->x; + src_y += src->pDrawable->y; + } + if (!clip_to_src(region, src, dst_x - src_x, dst_y - src_y)) { + pixman_region_fini (region); + return FALSE; + } + DBG(("%s: clip against src: (%d, %d), (%d, %d)\n", + __FUNCTION__, + region->extents.x1, region->extents.y1, + region->extents.x2, region->extents.y2)); + + if (src->alphaMap) { + if (!clip_to_src(region, src->alphaMap, + dst_x - (src_x - src->alphaOrigin.x), + dst_y - (src_y - src->alphaOrigin.y))) { + pixman_region_fini(region); + return FALSE; + } + } + + /* clip against mask */ + if (mask) { + if (mask->pDrawable) { + mask_x += mask->pDrawable->x; + mask_y += mask->pDrawable->y; + } + if (!clip_to_src(region, mask, dst_x - mask_x, dst_y - mask_y)) { + pixman_region_fini(region); + return FALSE; + } + if (mask->alphaMap) { + if (!clip_to_src(region, mask->alphaMap, + dst_x - (mask_x - mask->alphaOrigin.x), + dst_y - (mask_y - mask->alphaOrigin.y))) { + pixman_region_fini(region); + return FALSE; + } + } + + DBG(("%s: clip against mask: (%d, %d), (%d, %d)\n", + __FUNCTION__, + region->extents.x1, region->extents.y1, + region->extents.x2, region->extents.y2)); + } + + return pixman_region_not_empty(region); +} + +static void +trim_extents(BoxPtr extents, const PicturePtr p, int dx, int dy) +{ + const BoxPtr box = REGION_EXTENTS(NULL, p->pCompositeClip); + + DBG(("%s: trim((%d, %d), (%d, %d)) against ((%d, %d), (%d, %d)) + (%d, %d)\n", + __FUNCTION__, + extents->x1, extents->y1, extents->x2, extents->y2, + box->x1, box->y1, box->x2, box->y2, + dx, dy)); + + if (extents->x1 < box->x1 + dx) + extents->x1 = box->x1 + dx; + if (extents->x2 > box->x2 + dx) + extents->x2 = box->x2 + dx; + + if (extents->y1 < box->y1 + dy) + extents->y1 = box->y1 + dy; + if (extents->y2 > box->y2 + dy) + extents->y2 = box->y2 + dy; +} + +static void +_trim_source_extents(BoxPtr extents, const PicturePtr p, int dx, int dy) +{ + if (p->clientClipType != CT_NONE) + trim_extents(extents, p, dx, dy); +} + +static void +trim_source_extents(BoxPtr extents, const PicturePtr p, int dx, int dy) +{ + if (p->pDrawable) { + dx += p->pDrawable->x; + dy += p->pDrawable->y; + } + _trim_source_extents(extents, p, dx, dy); + if (p->alphaMap) + _trim_source_extents(extents, p->alphaMap, + dx - p->alphaOrigin.x, + dy - p->alphaOrigin.y); + + DBG(("%s: -> (%d, %d), (%d, %d)\n", + __FUNCTION__, + extents->x1, extents->y1, + extents->x2, extents->y2)); +} + +Bool +sna_compute_composite_extents(BoxPtr extents, + PicturePtr src, PicturePtr mask, PicturePtr dst, + INT16 src_x, INT16 src_y, + INT16 mask_x, INT16 mask_y, + INT16 dst_x, INT16 dst_y, + CARD16 width, CARD16 height) +{ + int v; + + DBG(("%s: dst=(%d, %d)x(%d, %d)\n", + __FUNCTION__, + dst_x, dst_y, + width, height)); + + extents->x1 = dst_x < 0 ? 0 : dst_x; + v = dst_x + width; + if (v > dst->pDrawable->width) + v = dst->pDrawable->width; + extents->x2 = v; + + extents->y1 = dst_y < 0 ? 0 : dst_y; + v = dst_y + height; + if (v > dst->pDrawable->height) + v = dst->pDrawable->height; + + DBG(("%s: initial clip against dst->pDrawable: (%d, %d), (%d, %d)\n", + __FUNCTION__, + extents->x1, extents->y1, + extents->x2, extents->y2)); + + if (extents->x1 >= extents->x2 || + extents->y1 >= extents->y2) + return FALSE; + + extents->x1 += dst->pDrawable->x; + extents->x2 += dst->pDrawable->x; + extents->y1 += dst->pDrawable->y; + extents->y2 += dst->pDrawable->y; + + dst_x += dst->pDrawable->x; + dst_y += dst->pDrawable->y; + + /* clip against dst */ + trim_extents(extents, dst, 0, 0); + if (dst->alphaMap) + trim_extents(extents, dst->alphaMap, + -dst->alphaOrigin.x, + -dst->alphaOrigin.y); + + DBG(("%s: clip against dst: (%d, %d), (%d, %d)\n", + __FUNCTION__, + extents->x1, extents->y1, + extents->x2, extents->y2)); + + trim_source_extents(extents, src, dst_x - src_x, dst_y - src_y); + if (mask) + trim_source_extents(extents, mask, + dst_x - mask_x, dst_y - mask_y); + + return extents->x1 < extents->x2 && extents->y1 < extents->y2; +} + +#if DEBUG_COMPOSITE +static void _assert_pixmap_contains_box(PixmapPtr pixmap, BoxPtr box, const char *function) +{ + if (box->x1 < 0 || box->y1 < 0 || + box->x2 > pixmap->drawable.width || + box->y2 > pixmap->drawable.height) + { + ErrorF("%s: damage box is beyond the pixmap: box=(%d, %d), (%d, %d), pixmap=(%d, %d)\n", + __FUNCTION__, + box->x1, box->y1, box->x2, box->y2, + pixmap->drawable.width, + pixmap->drawable.height); + assert(0); + } +} +#define assert_pixmap_contains_box(p, b) _assert_pixmap_contains_box(p, b, __FUNCTION__) +#else +#define assert_pixmap_contains_box(p, b) +#endif + +static void apply_damage(struct sna_composite_op *op, RegionPtr region) +{ + DBG(("%s: damage=%p, region=%d\n", + __FUNCTION__, op->damage, REGION_NUM_RECTS(region))); + + if (op->damage == NULL) + return; + + RegionTranslate(region, op->dst.x, op->dst.y); + + assert_pixmap_contains_box(op->dst.pixmap, RegionExtents(region)); + sna_damage_add(op->damage, region); +} + +void +sna_composite(CARD8 op, + PicturePtr src, + PicturePtr mask, + PicturePtr dst, + INT16 src_x, INT16 src_y, + INT16 mask_x, INT16 mask_y, + INT16 dst_x, INT16 dst_y, + CARD16 width, CARD16 height) +{ + struct sna *sna = to_sna_from_drawable(dst->pDrawable); + struct sna_composite_op tmp; + RegionRec region; + + DBG(("%s(%d src=(%d, %d), mask=(%d, %d), dst=(%d, %d)+(%d, %d), size=(%d, %d)\n", + __FUNCTION__, op, + src_x, src_y, + mask_x, mask_y, + dst_x, dst_y, dst->pDrawable->x, dst->pDrawable->y, + width, height)); + + if (sna->kgem.wedged) { + DBG(("%s: fallback -- wedged\n", __FUNCTION__)); + goto fallback; + } + + if (dst->alphaMap || src->alphaMap || (mask && mask->alphaMap)) { + DBG(("%s: fallback due to unhandled alpha-map\n", __FUNCTION__)); + goto fallback; + } + + if (too_small(sna, dst->pDrawable) && + !picture_is_gpu(src) && !picture_is_gpu(mask)) { + DBG(("%s: fallback due to too small\n", __FUNCTION__)); + goto fallback; + } + + if (!sna_compute_composite_region(®ion, + src, mask, dst, + src_x, src_y, + mask_x, mask_y, + dst_x, dst_y, + width, height)) + return; + + DBG(("%s: composite region extents: (%d, %d), (%d, %d) + (%d, %d)\n", + __FUNCTION__, + region.extents.x1, region.extents.y1, + region.extents.x2, region.extents.y2, + get_drawable_dx(dst->pDrawable), + get_drawable_dy(dst->pDrawable))); + + memset(&tmp, 0, sizeof(tmp)); + if (!sna->render.composite(sna, + op, src, mask, dst, + src_x, src_y, + mask_x, mask_y, + dst_x + dst->pDrawable->x, + dst_y + dst->pDrawable->y, + width, height, + &tmp)) { + DBG(("%s: fallback due unhandled composite op\n", __FUNCTION__)); + goto fallback; + } + + tmp.boxes(sna, &tmp, + REGION_RECTS(®ion), + REGION_NUM_RECTS(®ion)); + apply_damage(&tmp, ®ion); + tmp.done(sna, &tmp); + + REGION_UNINIT(NULL, ®ion); + return; + +fallback: + DBG(("%s -- fallback dst=(%d, %d)+(%d, %d), size=(%d, %d)\n", + __FUNCTION__, + dst_x, dst_y, + dst->pDrawable->x, dst->pDrawable->y, + width, height)); + + dst_move_area_to_cpu(dst, op, + dst_x + dst->pDrawable->x, + dst_y + dst->pDrawable->y, + width, height); + if (src->pDrawable) + sna_drawable_move_to_cpu(src->pDrawable, false); + if (mask && mask->pDrawable) + sna_drawable_move_to_cpu(mask->pDrawable, false); + + DBG(("%s: fallback -- fbCompposite\n", __FUNCTION__)); + fbComposite(op, src, mask, dst, + src_x, src_y, + mask_x, mask_y, + dst_x, dst_y, + width, height); +} + +static Bool +_pixman_region_init_clipped_rectangles(pixman_region16_t *region, + int num_rects, xRectangle *rects, + int tx, int ty, + int maxx, int maxy) +{ + pixman_box16_t stack_boxes[64], *boxes = stack_boxes; + pixman_bool_t ret; + int i, j; + + if (num_rects > ARRAY_SIZE(stack_boxes)) { + boxes = malloc(sizeof(pixman_box16_t) * num_rects); + if (boxes == NULL) + return FALSE; + } + + for (i = j = 0; i < num_rects; i++) { + boxes[j].x1 = rects[i].x + tx; + if (boxes[j].x1 < 0) + boxes[j].x1 = 0; + + boxes[j].y1 = rects[i].y + ty; + if (boxes[j].y1 < 0) + boxes[j].y1 = 0; + + boxes[j].x2 = rects[i].x + rects[i].width; + if (boxes[j].x2 > maxx) + boxes[j].x2 = maxx; + boxes[j].x2 += tx; + + boxes[j].y2 = rects[i].y + rects[i].height; + if (boxes[j].y2 > maxy) + boxes[j].y2 = maxy; + boxes[j].y2 += ty; + + if (boxes[j].x2 > boxes[j].x1 && boxes[j].y2 > boxes[j].y1) + j++; + } + + ret = TRUE; + if (j) + ret = pixman_region_init_rects(region, boxes, j); + else + pixman_region_init(region); + + if (boxes != stack_boxes) + free(boxes); + + DBG(("%s: nrects=%d, region=(%d, %d), (%d, %d) x %d\n", + __FUNCTION__, num_rects, + region->extents.x1, region->extents.y1, + region->extents.x2, region->extents.y2, + pixman_region_n_rects(region))); + return ret; +} + +void +sna_composite_rectangles(CARD8 op, + PicturePtr dst, + xRenderColor *color, + int num_rects, + xRectangle *rects) +{ + struct sna *sna = to_sna_from_drawable(dst->pDrawable); + PixmapPtr pixmap; + struct sna_pixmap *priv; + pixman_region16_t region; + pixman_box16_t *boxes; + int16_t dst_x, dst_y; + int num_boxes; + int error; + + DBG(("%s(op=%d, %08x x %d [(%d, %d)x(%d, %d) ...])\n", + __FUNCTION__, op, + (color->alpha >> 8 << 24) | + (color->red >> 8 << 16) | + (color->green >> 8 << 8) | + (color->blue >> 8 << 0), + num_rects, + rects[0].x, rects[0].y, rects[0].width, rects[0].height)); + + if (!num_rects) + return; + + if (!pixman_region_not_empty(dst->pCompositeClip)) { + DBG(("%s: empty clip, skipping\n", __FUNCTION__)); + return; + } + + if (!_pixman_region_init_clipped_rectangles(®ion, + num_rects, rects, + dst->pDrawable->x, dst->pDrawable->y, + dst->pDrawable->width, dst->pDrawable->height)) + { + DBG(("%s: allocation failed for region\n", __FUNCTION__)); + return; + } + + DBG(("%s: drawable extents (%d, %d),(%d, %d) x %d\n", + __FUNCTION__, + RegionExtents(®ion)->x1, RegionExtents(®ion)->y1, + RegionExtents(®ion)->x2, RegionExtents(®ion)->y2, + RegionNumRects(®ion))); + + if (!pixman_region_intersect(®ion, ®ion, dst->pCompositeClip) || + !pixman_region_not_empty(®ion)) { + DBG(("%s: zero-intersection between rectangles and clip\n", + __FUNCTION__)); + pixman_region_fini(®ion); + return; + } + + DBG(("%s: clipped extents (%d, %d),(%d, %d) x %d\n", + __FUNCTION__, + RegionExtents(®ion)->x1, RegionExtents(®ion)->y1, + RegionExtents(®ion)->x2, RegionExtents(®ion)->y2, + RegionNumRects(®ion))); + + pixmap = get_drawable_pixmap(dst->pDrawable); + get_drawable_deltas(dst->pDrawable, pixmap, &dst_x, &dst_y); + pixman_region_translate(®ion, dst_x, dst_y); + + DBG(("%s: pixmap +(%d, %d) extents (%d, %d),(%d, %d)\n", + __FUNCTION__, dst_x, dst_y, + RegionExtents(®ion)->x1, RegionExtents(®ion)->y1, + RegionExtents(®ion)->x2, RegionExtents(®ion)->y2)); + + if (sna->kgem.wedged) + goto fallback; + + if (dst->alphaMap) { + DBG(("%s: fallback, dst has an alpha-map\n", __FUNCTION__)); + goto fallback; + } + + boxes = pixman_region_rectangles(®ion, &num_boxes); + + if (op == PictOpClear) { + color->red = color->green = color->blue = color->alpha = 0; + } else if (color->alpha >= 0xff00 && op == PictOpOver) { + color->alpha = 0xffff; + op = PictOpSrc; + } + + if (too_small(sna, dst->pDrawable)) { + DBG(("%s: fallback, dst is too small\n", __FUNCTION__)); + goto fallback; + } + + priv = sna_pixmap_move_to_gpu(pixmap); + if (priv == NULL) { + DBG(("%s: fallback due to no GPU bo\n", __FUNCTION__)); + goto fallback; + } + + if (!sna->render.fill_boxes(sna, op, dst->format, color, + pixmap, priv->gpu_bo, + boxes, num_boxes)) { + DBG(("%s: fallback - acceleration failed\n", __FUNCTION__)); + goto fallback; + } + + if (!priv->gpu_only) { + assert_pixmap_contains_box(pixmap, RegionExtents(®ion)); + sna_damage_add(&priv->gpu_damage, ®ion); + } + + goto done; + +fallback: + DBG(("%s: fallback\n", __FUNCTION__)); + sna_drawable_move_region_to_cpu(&pixmap->drawable, ®ion, true); + + if (op == PictOpSrc || op == PictOpClear) { + PixmapPtr pixmap = get_drawable_pixmap(dst->pDrawable); + int nbox = REGION_NUM_RECTS(®ion); + BoxPtr box = REGION_RECTS(®ion); + uint32_t pixel; + + if (sna_get_pixel_from_rgba(&pixel, + color->red, + color->green, + color->blue, + color->alpha, + dst->format)) { + do { + DBG(("%s: fallback fill: (%d, %d)x(%d, %d) %08x\n", + __FUNCTION__, + box->x1, box->y1, + box->x2 - box->x1, + box->y2 - box->y1, + pixel)); + + pixman_fill(pixmap->devPrivate.ptr, + pixmap->devKind/sizeof(uint32_t), + pixmap->drawable.bitsPerPixel, + box->x1, box->y1, + box->x2 - box->x1, + box->y2 - box->y1, + pixel); + box++; + } while (--nbox); + } + } else { + PicturePtr src; + + src = CreateSolidPicture(0, color, &error); + if (src) { + do { + fbComposite(op, src, NULL, dst, + 0, 0, + 0, 0, + rects->x, rects->y, + rects->width, rects->height); + rects++; + } while (--num_rects); + FreePicture(src, 0); + } + } + +done: + /* XXX xserver-1.8: CompositeRects is not tracked by Damage, so we must + * manually append the damaged regions ourselves. + */ + DamageRegionAppend(&pixmap->drawable, ®ion); + DamageRegionProcessPending(&pixmap->drawable); + + pixman_region_fini(®ion); + return; +} diff --git a/src/sna/sna_damage.c b/src/sna/sna_damage.c new file mode 100644 index 00000000..21af2d0a --- /dev/null +++ b/src/sna/sna_damage.c @@ -0,0 +1,944 @@ +/************************************************************************** + +Copyright (c) 2011 Intel Corporation + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sub license, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial portions +of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. +IN NO EVENT SHALL PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR +ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, +TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE +SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **************************************************************************/ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "sna.h" +#include "sna_damage.h" + +#if DEBUG_DAMAGE +#undef DBG +#define DBG(x) ErrorF x + +static const char *_debug_describe_region(char *buf, int max, + RegionPtr region) +{ + BoxPtr extents; + BoxPtr box; + int n; + int len; + + if (region == NULL) + return "nil"; + + n = REGION_NUM_RECTS(region); + if (n == 0) + return "[0]"; + + extents = REGION_EXTENTS(NULL, region); + if (n == 1) { + sprintf(buf, + "[(%d, %d), (%d, %d)]", + extents->x1, extents->y1, + extents->x2, extents->y2); + return buf; + } + + len = sprintf(buf, + "[(%d, %d), (%d, %d) x %d: ", + extents->x1, extents->y1, + extents->x2, extents->y2, + n) + 3; + max -= 2; + box = REGION_RECTS(region); + while (n--) { + char tmp[80]; + int this; + + this = snprintf(tmp, sizeof(tmp), + "((%d, %d), (%d, %d))%s", + box->x1, box->y1, + box->x2, box->y2, + n ? ", ..." : ""); + box++; + + if (this > max - len) + break; + + len -= 3; + memcpy(buf + len, tmp, this); + len += this; + } + buf[len++] = ']'; + buf[len] = '\0'; + return buf; +} + +static const char *_debug_describe_damage(char *buf, int max, + struct sna_damage *damage) +{ + char damage_str[500], region_str[500]; + int str_max; + + if (damage == NULL) + return "None"; + + str_max = max/2 - 6; + if (str_max > sizeof(damage_str)) + str_max = sizeof(damage_str); + + sprintf(damage_str, "[%d : ...]", damage->n); + snprintf(buf, max, "[[(%d, %d), (%d, %d)]: %s + %s]", + damage->extents.x1, damage->extents.y1, + damage->extents.x2, damage->extents.y2, + _debug_describe_region(region_str, str_max, + &damage->region), + damage_str); + + return buf; +} + +#endif + +struct sna_damage_box { + struct list list; + uint16_t size, remain; +}; + +struct sna_damage_elt { + enum mode { + ADD, + SUBTRACT, + } mode; + BoxPtr box; + uint16_t n; +}; + +static struct sna_damage *_sna_damage_create(void) +{ + struct sna_damage *damage; + + damage = malloc(sizeof(*damage)); + damage->n = 0; + damage->size = 16; + damage->elts = malloc(sizeof(*damage->elts) * damage->size); + list_init(&damage->boxes); + damage->last_box = NULL; + damage->mode = ADD; + pixman_region_init(&damage->region); + damage->extents.x1 = damage->extents.y1 = MAXSHORT; + damage->extents.x2 = damage->extents.y2 = MINSHORT; + + return damage; +} + +static BoxPtr _sna_damage_create_boxes(struct sna_damage *damage, + int count) +{ + struct sna_damage_box *box; + int n; + + if (damage->last_box && damage->last_box->remain >= count) { + box = damage->last_box; + n = box->size - box->remain; + DBG((" %s(%d): reuse last box, used=%d, remain=%d\n", + __FUNCTION__, count, n, box->remain)); + box->remain -= count; + if (box->remain == 0) + damage->last_box = NULL; + return (BoxPtr)(box+1) + n; + } + + n = ALIGN(count, 64); + + DBG((" %s(%d->%d): new\n", __FUNCTION__, count, n)); + + box = malloc(sizeof(*box) + sizeof(BoxRec)*n); + box->size = n; + box->remain = n - count; + list_add(&box->list, &damage->boxes); + + damage->last_box = box; + return (BoxPtr)(box+1); +} + +static void +_sna_damage_create_elt(struct sna_damage *damage, + enum mode mode, + const BoxRec *boxes, int count) +{ + struct sna_damage_elt *elt; + + DBG((" %s(%s): n=%d, prev=(%s, remain %d)\n", __FUNCTION__, + mode == ADD ? "add" : "subtract", + damage->n, + damage->n ? damage->elts[damage->n-1].mode == ADD ? "add" : "subtract" : "none", + damage->last_box ? damage->last_box->remain : 0)); + + if (damage->last_box && damage->elts[damage->n-1].mode == mode) { + int n; + + n = count; + if (n > damage->last_box->remain) + n = damage->last_box->remain; + + elt = damage->elts + damage->n-1; + memcpy(elt->box + elt->n, boxes, n * sizeof(BoxRec)); + elt->n += n; + damage->last_box->remain -= n; + if (damage->last_box->remain == 0) + damage->last_box = NULL; + + count -=n; + boxes += n; + if (count == 0) + return; + } + + if (damage->n == damage->size) { + int newsize = damage->size * 2; + struct sna_damage_elt *newelts = realloc(damage->elts, + newsize*sizeof(*elt)); + if (newelts == NULL) + return; + + damage->elts = newelts; + damage->size = newsize; + } + + DBG((" %s(): new elt\n", __FUNCTION__)); + + elt = damage->elts + damage->n++; + elt->mode = mode; + elt->n = count; + elt->box = memcpy(_sna_damage_create_boxes(damage, count), + boxes, count * sizeof(BoxRec)); +} + +static void free_list(struct list *head) +{ + while (!list_is_empty(head)) { + struct list *l = head->next; + list_del(l); + free(l); + } +} + +static void __sna_damage_reduce(struct sna_damage *damage) +{ + int n, m, j; + int nboxes; + BoxPtr boxes; + pixman_region16_t tmp, *region = &damage->region; + + DBG((" reduce: before damage.n=%d region.n=%d\n", + damage->n, REGION_NUM_RECTS(region))); + + m = 0; + nboxes = damage->elts[0].n; + boxes = damage->elts[0].box; + for (n = 1; n < damage->n; n++) { + if (damage->elts[n].mode != damage->elts[m].mode) { + if (!boxes) { + boxes = malloc(sizeof(BoxRec)*nboxes); + nboxes = 0; + for (j = m; j < n; j++) { + memcpy(boxes+nboxes, + damage->elts[j].box, + damage->elts[j].n*sizeof(BoxRec)); + nboxes += damage->elts[j].n; + } + } + + pixman_region_init_rects(&tmp, boxes, nboxes); + if (damage->elts[m].mode == ADD) + pixman_region_union(region, region, &tmp); + else + pixman_region_subtract(region, region, &tmp); + pixman_region_fini(&tmp); + + if (boxes != damage->elts[m].box) + free(boxes); + + m = n; + boxes = damage->elts[n].box; + nboxes = damage->elts[n].n; + } else { + boxes = NULL; + nboxes += damage->elts[n].n; + } + } + + if (!boxes) { + boxes = malloc(sizeof(BoxRec)*nboxes); + nboxes = 0; + for (j = m; j < n; j++) { + memcpy(boxes+nboxes, + damage->elts[j].box, + damage->elts[j].n*sizeof(BoxRec)); + nboxes += damage->elts[j].n; + } + } + + pixman_region_init_rects(&tmp, boxes, nboxes); + if (damage->elts[m].mode == ADD) + pixman_region_union(region, region, &tmp); + else + pixman_region_subtract(region, region, &tmp); + pixman_region_fini(&tmp); + + damage->extents = region->extents; + + if (boxes != damage->elts[m].box) + free(boxes); + + damage->n = 0; + free_list(&damage->boxes); + damage->last_box = NULL; + damage->mode = ADD; + + DBG((" reduce: after region.n=%d\n", REGION_NUM_RECTS(region))); +} + +inline static struct sna_damage *__sna_damage_add(struct sna_damage *damage, + RegionPtr region) +{ + if (!RegionNotEmpty(region)) + return damage; + + if (!damage) + damage = _sna_damage_create(); + + if (damage->mode == SUBTRACT) + __sna_damage_reduce(damage); + damage->mode = ADD; + + if (REGION_NUM_RECTS(&damage->region) <= 1) { + pixman_region_union(&damage->region, &damage->region, region); + damage->extents = damage->region.extents; + return damage; + } + + if (pixman_region_contains_rectangle(&damage->region, + ®ion->extents) == PIXMAN_REGION_IN) + return damage; + + _sna_damage_create_elt(damage, ADD, + REGION_RECTS(region), + REGION_NUM_RECTS(region)); + + if (damage->extents.x1 > region->extents.x1) + damage->extents.x1 = region->extents.x1; + if (damage->extents.x2 < region->extents.x2) + damage->extents.x2 = region->extents.x2; + + if (damage->extents.y1 > region->extents.y1) + damage->extents.y1 = region->extents.y1; + if (damage->extents.y2 < region->extents.y2) + damage->extents.y2 = region->extents.y2; + + return damage; +} + +#if DEBUG_DAMAGE +fastcall struct sna_damage *_sna_damage_add(struct sna_damage *damage, + RegionPtr region) +{ + char region_buf[120]; + char damage_buf[1000]; + + DBG(("%s(%s + %s)\n", __FUNCTION__, + _debug_describe_damage(damage_buf, sizeof(damage_buf), damage), + _debug_describe_region(region_buf, sizeof(region_buf), region))); + + damage = __sna_damage_add(damage, region); + + ErrorF(" = %s\n", + _debug_describe_damage(damage_buf, sizeof(damage_buf), damage)); + + return damage; +} +#else +fastcall struct sna_damage *_sna_damage_add(struct sna_damage *damage, + RegionPtr region) +{ + return __sna_damage_add(damage, region); +} +#endif + +inline static struct sna_damage *__sna_damage_add_box(struct sna_damage *damage, + const BoxRec *box) +{ + if (box->y2 <= box->y1 || box->x2 <= box->x1) + return damage; + + if (!damage) + damage = _sna_damage_create(); + + if (damage->mode == SUBTRACT) + __sna_damage_reduce(damage); + damage->mode = ADD; + + if (REGION_NUM_RECTS(&damage->region) == 0) { + pixman_region_init_rects(&damage->region, box, 1); + damage->extents = *box; + return damage; + } + + if (pixman_region_contains_rectangle(&damage->region, + (BoxPtr)box) == PIXMAN_REGION_IN) + return damage; + + _sna_damage_create_elt(damage, ADD, box, 1); + + if (damage->extents.x1 > box->x1) + damage->extents.x1 = box->x1; + if (damage->extents.x2 < box->x2) + damage->extents.x2 = box->x2; + + if (damage->extents.y1 > box->y1) + damage->extents.y1 = box->y1; + if (damage->extents.y2 < box->y2) + damage->extents.y2 = box->y2; + + return damage; +} + +#if DEBUG_DAMAGE +fastcall struct sna_damage *_sna_damage_add_box(struct sna_damage *damage, + const BoxRec *box) +{ + char damage_buf[1000]; + + DBG(("%s(%s + [(%d, %d), (%d, %d)])\n", __FUNCTION__, + _debug_describe_damage(damage_buf, sizeof(damage_buf), damage), + box->x1, box->y1, box->x2, box->y2)); + + damage = __sna_damage_add_box(damage, box); + + ErrorF(" = %s\n", + _debug_describe_damage(damage_buf, sizeof(damage_buf), damage)); + + return damage; +} +#else +fastcall struct sna_damage *_sna_damage_add_box(struct sna_damage *damage, + const BoxRec *box) +{ + return __sna_damage_add_box(damage, box); +} +#endif + +struct sna_damage *_sna_damage_all(struct sna_damage *damage, + int width, int height) +{ + DBG(("%s(%d, %d)\n", __FUNCTION__, width, height)); + + if (damage) { + free_list(&damage->boxes); + pixman_region_fini(&damage->region); + damage->n = 0; + damage->last_box = NULL; + } else + damage = _sna_damage_create(); + + pixman_region_init_rect(&damage->region, 0, 0, width, height); + damage->extents = damage->region.extents; + damage->mode = ADD; + + return damage; +} + +static inline Bool sna_damage_maybe_contains_box(struct sna_damage *damage, + const BoxRec *box) +{ + if (box->x2 <= damage->extents.x1 || + box->x1 >= damage->extents.x2) + return FALSE; + + if (box->y2 <= damage->extents.y1 || + box->y1 >= damage->extents.y2) + return FALSE; + + return TRUE; +} + +static struct sna_damage *__sna_damage_subtract(struct sna_damage *damage, + RegionPtr region) +{ + if (damage == NULL) + return NULL; + + if (!RegionNotEmpty(&damage->region)) { + __sna_damage_destroy(damage); + return NULL; + } + + if (!RegionNotEmpty(region)) + return damage; + + if (!sna_damage_maybe_contains_box(damage, ®ion->extents)) + return damage; + + if (damage->n == 0) { + if (pixman_region_equal(region, &damage->region)) { + __sna_damage_destroy(damage); + return NULL; + } + + if (!pixman_region_not_empty(&damage->region)) { + __sna_damage_destroy(damage); + return NULL; + } + + if (REGION_NUM_RECTS(&damage->region) == 1 && + REGION_NUM_RECTS(region) == 1) { + pixman_region_subtract(&damage->region, + &damage->region, + region); + damage->extents = damage->region.extents; + return damage; + } + } + + damage->mode = SUBTRACT; + _sna_damage_create_elt(damage, SUBTRACT, + REGION_RECTS(region), + REGION_NUM_RECTS(region)); + + return damage; +} + +#if DEBUG_DAMAGE +fastcall struct sna_damage *_sna_damage_subtract(struct sna_damage *damage, + RegionPtr region) +{ + char damage_buf[1000]; + char region_buf[120]; + + ErrorF("%s(%s - %s)...\n", __FUNCTION__, + _debug_describe_damage(damage_buf, sizeof(damage_buf), damage), + _debug_describe_region(region_buf, sizeof(region_buf), region)); + + damage = __sna_damage_subtract(damage, region); + + ErrorF(" = %s\n", + _debug_describe_damage(damage_buf, sizeof(damage_buf), damage)); + + return damage; +} +#else +fastcall struct sna_damage *_sna_damage_subtract(struct sna_damage *damage, + RegionPtr region) +{ + return __sna_damage_subtract(damage, region); +} +#endif + +inline static struct sna_damage *__sna_damage_subtract_box(struct sna_damage *damage, + const BoxRec *box) +{ + if (damage == NULL) + return NULL; + + if (!RegionNotEmpty(&damage->region)) { + __sna_damage_destroy(damage); + return NULL; + } + + if (!sna_damage_maybe_contains_box(damage, box)) + return damage; + + if (damage->n == 0) { + if (!pixman_region_not_empty(&damage->region)) { + __sna_damage_destroy(damage); + return NULL; + } + + if (REGION_NUM_RECTS(&damage->region) == 1) { + pixman_region16_t region; + + pixman_region_init_rects(®ion, box, 1); + pixman_region_subtract(&damage->region, + &damage->region, + ®ion); + damage->extents = damage->region.extents; + return damage; + } + } + + damage->mode = SUBTRACT; + _sna_damage_create_elt(damage, SUBTRACT, box, 1); + + return damage; +} + +#if DEBUG_DAMAGE +fastcall struct sna_damage *_sna_damage_subtract_box(struct sna_damage *damage, + const BoxRec *box) +{ + char damage_buf[1000]; + + ErrorF("%s(%s - (%d, %d), (%d, %d))...\n", __FUNCTION__, + _debug_describe_damage(damage_buf, sizeof(damage_buf), damage), + box->x1, box->y1, box->x2, box->y2); + + damage = __sna_damage_subtract_box(damage, box); + + ErrorF(" = %s\n", + _debug_describe_damage(damage_buf, sizeof(damage_buf), damage)); + + return damage; +} +#else +fastcall struct sna_damage *_sna_damage_subtract_box(struct sna_damage *damage, + const BoxRec *box) +{ + return __sna_damage_subtract_box(damage, box); +} +#endif + +static int _sna_damage_contains_box(struct sna_damage *damage, + const BoxPtr box) +{ + if (!damage) + return PIXMAN_REGION_OUT;; + + if (!sna_damage_maybe_contains_box(damage, box)) + return PIXMAN_REGION_OUT; + + if (damage->n) + __sna_damage_reduce(damage); + + return pixman_region_contains_rectangle(&damage->region, box); +} + +#if DEBUG_DAMAGE +int sna_damage_contains_box(struct sna_damage *damage, + const BoxPtr box) +{ + char damage_buf[1000]; + int ret; + + DBG(("%s(%s, [(%d, %d), (%d, %d)])\n", __FUNCTION__, + _debug_describe_damage(damage_buf, sizeof(damage_buf), damage), + box->x1, box->y1, box->x2, box->y2)); + + ret = _sna_damage_contains_box(damage, box); + ErrorF(" = %d\n", ret); + + return ret; +} +#else +int sna_damage_contains_box(struct sna_damage *damage, + const BoxPtr box) +{ + return _sna_damage_contains_box(damage, box); +} +#endif + +static Bool _sna_damage_intersect(struct sna_damage *damage, + RegionPtr region, RegionPtr result) +{ + if (!damage) + return FALSE; + + if (region->extents.x2 <= damage->extents.x1 || + region->extents.x1 >= damage->extents.x2) + return FALSE; + + if (region->extents.y2 <= damage->extents.y1 || + region->extents.y1 >= damage->extents.y2) + return FALSE; + + if (damage->n) + __sna_damage_reduce(damage); + + if (!pixman_region_not_empty(&damage->region)) + return FALSE; + + RegionNull(result); + RegionIntersect(result, &damage->region, region); + + return RegionNotEmpty(result); +} + +#if DEBUG_DAMAGE +Bool sna_damage_intersect(struct sna_damage *damage, + RegionPtr region, RegionPtr result) +{ + char damage_buf[1000]; + char region_buf[120]; + Bool ret; + + ErrorF("%s(%s, %s)...\n", __FUNCTION__, + _debug_describe_damage(damage_buf, sizeof(damage_buf), damage), + _debug_describe_region(region_buf, sizeof(region_buf), region)); + + ret = _sna_damage_intersect(damage, region, result); + ErrorF(" = %d %s\n", + ret, + _debug_describe_region(region_buf, sizeof(region_buf), result)); + + return ret; +} +#else +Bool sna_damage_intersect(struct sna_damage *damage, + RegionPtr region, RegionPtr result) +{ + return _sna_damage_intersect(damage, region, result); +} +#endif + +static int _sna_damage_get_boxes(struct sna_damage *damage, BoxPtr *boxes) +{ + if (!damage) + return 0; + + if (damage->n) + __sna_damage_reduce(damage); + + *boxes = REGION_RECTS(&damage->region); + return REGION_NUM_RECTS(&damage->region); +} + +struct sna_damage *_sna_damage_reduce(struct sna_damage *damage) +{ + DBG(("%s()\n", __FUNCTION__)); + + if (damage->n) + __sna_damage_reduce(damage); + + if (!pixman_region_not_empty(&damage->region)) { + __sna_damage_destroy(damage); + damage = NULL; + } + + return damage; +} + +#if DEBUG_DAMAGE +int sna_damage_get_boxes(struct sna_damage *damage, BoxPtr *boxes) +{ + char damage_buf[1000]; + int count; + + ErrorF("%s(%s)...\n", __FUNCTION__, + _debug_describe_damage(damage_buf, sizeof(damage_buf), damage)); + + count = _sna_damage_get_boxes(damage, boxes); + ErrorF(" = %d\n", count); + + return count; +} +#else +int sna_damage_get_boxes(struct sna_damage *damage, BoxPtr *boxes) +{ + return _sna_damage_get_boxes(damage, boxes); +} +#endif + +void __sna_damage_destroy(struct sna_damage *damage) +{ + free(damage->elts); + + free_list(&damage->boxes); + + pixman_region_fini(&damage->region); + free(damage); +} + +#if DEBUG_DAMAGE && TEST_DAMAGE +struct sna_damage_selftest{ + int width, height; +}; + +static void st_damage_init_random_box(struct sna_damage_selftest *test, + BoxPtr box) +{ + int x, y, w, h; + + if (test->width == 1) { + x = 0, w = 1; + } else { + x = rand() % (test->width - 1); + w = 1 + rand() % (test->width - x - 1); + } + + if (test->height == 1) { + y = 0, h = 1; + } else { + y = rand() % (test->height - 1); + h = 1 + rand() % (test->height - y - 1); + } + + box->x1 = x; + box->x2 = x+w; + + box->y1 = y; + box->y2 = y+h; +} + +static void st_damage_init_random_region1(struct sna_damage_selftest *test, + pixman_region16_t *region) +{ + int x, y, w, h; + + if (test->width == 1) { + x = 0, w = 1; + } else { + x = rand() % (test->width - 1); + w = 1 + rand() % (test->width - x - 1); + } + + if (test->height == 1) { + y = 0, h = 1; + } else { + y = rand() % (test->height - 1); + h = 1 + rand() % (test->height - y - 1); + } + + pixman_region_init_rect(region, x, y, w, h); +} + +static void st_damage_add(struct sna_damage_selftest *test, + struct sna_damage **damage, + pixman_region16_t *region) +{ + pixman_region16_t tmp; + + st_damage_init_random_region1(test, &tmp); + + sna_damage_add(damage, &tmp); + pixman_region_union(region, region, &tmp); +} + +static void st_damage_add_box(struct sna_damage_selftest *test, + struct sna_damage **damage, + pixman_region16_t *region) +{ + BoxRec box; + + st_damage_init_random_box(test, &box); + + sna_damage_add_box(damage, &box); + pixman_region_union_rectangle(region, region, + box.x1, box.y2, + box.x2 - box.x1, + box.y2 - box.y1); +} + +static void st_damage_subtract(struct sna_damage_selftest *test, + struct sna_damage **damage, + pixman_region16_t *region) +{ + pixman_region16_t tmp; + + st_damage_init_random_region1(test, &tmp); + + sna_damage_subtract(damage, &tmp); + pixman_region_subtract(region, region, &tmp); +} + +static void st_damage_all(struct sna_damage_selftest *test, + struct sna_damage **damage, + pixman_region16_t *region) +{ + pixman_region16_t tmp; + + pixman_region_init_rect(&tmp, 0, 0, test->width, test->height); + + sna_damage_all(damage, test->width, test->height); + pixman_region_union(region, region, &tmp); +} + +static bool st_check_equal(struct sna_damage_selftest *test, + struct sna_damage **damage, + pixman_region16_t *region) +{ + int d_num, r_num; + BoxPtr d_boxes, r_boxes; + + d_num = sna_damage_get_boxes(*damage, &d_boxes); + r_boxes = pixman_region_rectangles(region, &r_num); + + if (d_num != r_num) { + ErrorF("%s: damage and ref contain different number of rectangles\n", + __FUNCTION__); + return FALSE; + } + + if (memcmp(d_boxes, r_boxes, d_num*sizeof(BoxRec))) { + ErrorF("%s: damage and ref contain different rectangles\n", + __FUNCTION__); + return FALSE; + } + + return TRUE; +} + +void sna_damage_selftest(void) +{ + void (*const op[])(struct sna_damage_selftest *test, + struct sna_damage **damage, + pixman_region16_t *region) = { + st_damage_add, + st_damage_add_box, + st_damage_subtract, + st_damage_all + }; + bool (*const check[])(struct sna_damage_selftest *test, + struct sna_damage **damage, + pixman_region16_t *region) = { + st_check_equal, + //st_check_contains, + }; + char region_buf[120]; + char damage_buf[1000]; + int pass; + + for (pass = 0; pass < 1024; pass++) { + struct sna_damage_selftest test; + struct sna_damage *damage; + pixman_region16_t ref; + int iter, i; + + iter = rand() % 1024; + + test.width = 1 + rand() % 2048; + test.height = 1 + rand() % 2048; + + damage = _sna_damage_create(); + pixman_region_init(&ref); + + for (i = 0; i < iter; i++) { + op[rand() % ARRAY_SIZE(op)](&test, &damage, &ref); + } + + if (!check[rand() % ARRAY_SIZE(check)](&test, &damage, &ref)) { + ErrorF("%s: failed - region = %s, damage = %s\n", __FUNCTION__, + _debug_describe_region(region_buf, sizeof(region_buf), &ref), + _debug_describe_damage(damage_buf, sizeof(damage_buf), damage)); + assert(0); + } + + pixman_region_fini(&ref); + sna_damage_destroy(&damage); + } +} +#endif diff --git a/src/sna/sna_damage.h b/src/sna/sna_damage.h new file mode 100644 index 00000000..0b335711 --- /dev/null +++ b/src/sna/sna_damage.h @@ -0,0 +1,94 @@ +#ifndef SNA_DAMAGE_H +#define SNA_DAMAGE_H + +#include <regionstr.h> +#include <list.h> + +#define fastcall __attribute__((regparm(3))) + +struct sna_damage_elt; +struct sna_damage_box; + +struct sna_damage { + BoxRec extents; + int n, size, mode; + pixman_region16_t region; + struct sna_damage_elt *elts; + struct sna_damage_box *last_box; + struct list boxes; +}; + +fastcall struct sna_damage *_sna_damage_add(struct sna_damage *damage, + RegionPtr region); +static inline void sna_damage_add(struct sna_damage **damage, + RegionPtr region) +{ + *damage = _sna_damage_add(*damage, region); +} + +fastcall struct sna_damage *_sna_damage_add_box(struct sna_damage *damage, + const BoxRec *box); +static inline void sna_damage_add_box(struct sna_damage **damage, + const BoxRec *box) +{ + *damage = _sna_damage_add_box(*damage, box); +} + +struct sna_damage *_sna_damage_all(struct sna_damage *damage, + int width, int height); +static inline void sna_damage_all(struct sna_damage **damage, + int width, int height) +{ + *damage = _sna_damage_all(*damage, width, height); +} + +fastcall struct sna_damage *_sna_damage_subtract(struct sna_damage *damage, + RegionPtr region); +static inline void sna_damage_subtract(struct sna_damage **damage, + RegionPtr region) +{ + *damage = _sna_damage_subtract(*damage, region); +} + +fastcall struct sna_damage *_sna_damage_subtract_box(struct sna_damage *damage, + const BoxRec *box); +static inline void sna_damage_subtract_box(struct sna_damage **damage, + BoxPtr box) +{ + *damage = _sna_damage_subtract_box(*damage, box); +} + +Bool sna_damage_intersect(struct sna_damage *damage, + RegionPtr region, RegionPtr result); + +int sna_damage_contains_box(struct sna_damage *damage, + const BoxPtr box); + +int sna_damage_get_boxes(struct sna_damage *damage, BoxPtr *boxes); + +struct sna_damage *_sna_damage_reduce(struct sna_damage *damage); +static inline void sna_damage_reduce(struct sna_damage **damage) +{ + if (*damage == NULL) + return; + + *damage = _sna_damage_reduce(*damage); +} + +void __sna_damage_destroy(struct sna_damage *damage); +static inline void sna_damage_destroy(struct sna_damage **damage) +{ + if (*damage == NULL) + return; + + __sna_damage_destroy(*damage); + *damage = NULL; +} + +#if DEBUG_DAMAGE && TEST_DAMAGE +void sna_damage_selftest(void); +#else +static inline void sna_damage_selftest(void) {} +#endif + +#endif /* SNA_DAMAGE_H */ diff --git a/src/sna/sna_display.c b/src/sna/sna_display.c new file mode 100644 index 00000000..d27eafde --- /dev/null +++ b/src/sna/sna_display.c @@ -0,0 +1,1656 @@ +/* + * Copyright © 2007 Red Hat, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Dave Airlie <airlied@redhat.com> + * + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <stdint.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <unistd.h> +#include <errno.h> +#include <poll.h> + +#include <xorgVersion.h> +#include <X11/Xatom.h> + +#include "sna.h" + +#if DEBUG_DISPLAY +#undef DBG +#define DBG(x) ErrorF x +#endif + +struct sna_crtc { + drmModeModeInfo kmode; + drmModeCrtcPtr mode_crtc; + PixmapPtr shadow; + uint32_t shadow_fb_id; + uint32_t cursor; + xf86CrtcPtr crtc; + int pipe; + int active; + struct list link; +}; + +struct sna_property { + drmModePropertyPtr mode_prop; + uint64_t value; + int num_atoms; /* if range prop, num_atoms == 1; if enum prop, num_atoms == num_enums + 1 */ + Atom *atoms; +}; + +struct sna_output { + struct sna_mode *mode; + int output_id; + drmModeConnectorPtr mode_output; + drmModeEncoderPtr mode_encoder; + int num_props; + struct sna_property *props; + void *private_data; + + Bool has_panel_limits; + int panel_hdisplay; + int panel_vdisplay; + + int dpms_mode; + const char *backlight_iface; + int backlight_active_level; + int backlight_max; + xf86OutputPtr output; + struct list link; +}; + +static void +sna_output_dpms(xf86OutputPtr output, int mode); + +#define BACKLIGHT_CLASS "/sys/class/backlight" + +/* + * List of available kernel interfaces in priority order + */ +static const char *backlight_interfaces[] = { + "sna", /* prefer our own native backlight driver */ + "asus-laptop", + "eeepc", + "thinkpad_screen", + "mbp_backlight", + "fujitsu-laptop", + "sony", + "samsung", + "acpi_video1", /* finally fallback to the generic acpi drivers */ + "acpi_video0", + NULL, +}; +/* + * Must be long enough for BACKLIGHT_CLASS + '/' + longest in above table + + * '/' + "max_backlight" + */ +#define BACKLIGHT_PATH_LEN 80 +/* Enough for 10 digits of backlight + '\n' + '\0' */ +#define BACKLIGHT_VALUE_LEN 12 + +static inline int +crtc_id(struct sna_crtc *crtc) +{ + return crtc->mode_crtc->crtc_id; +} + +int sna_crtc_id(xf86CrtcPtr crtc) +{ + return crtc_id(crtc->driver_private); +} + +int sna_crtc_on(xf86CrtcPtr crtc) +{ + struct sna_crtc *sna_crtc = crtc->driver_private; + return sna_crtc->active; +} + +int sna_crtc_to_pipe(xf86CrtcPtr crtc) +{ + struct sna_crtc *sna_crtc = crtc->driver_private; + return sna_crtc->pipe; +} + +static uint32_t gem_create(int fd, int size) +{ + struct drm_i915_gem_create create; + + create.handle = 0; + create.size = ALIGN(size, 4096); + (void)drmIoctl(fd, DRM_IOCTL_I915_GEM_CREATE, &create); + + return create.handle; +} + +static void gem_close(int fd, uint32_t handle) +{ + struct drm_gem_close close; + + close.handle = handle; + (void)drmIoctl(fd, DRM_IOCTL_GEM_CLOSE, &close); +} + +static void +sna_output_backlight_set(xf86OutputPtr output, int level) +{ + struct sna_output *sna_output = output->driver_private; + char path[BACKLIGHT_PATH_LEN], val[BACKLIGHT_VALUE_LEN]; + int fd, len, ret; + + if (level > sna_output->backlight_max) + level = sna_output->backlight_max; + if (! sna_output->backlight_iface || level < 0) + return; + + len = snprintf(val, BACKLIGHT_VALUE_LEN, "%d\n", level); + sprintf(path, "%s/%s/brightness", + BACKLIGHT_CLASS, sna_output->backlight_iface); + fd = open(path, O_RDWR); + if (fd == -1) { + xf86DrvMsg(output->scrn->scrnIndex, X_ERROR, "failed to open %s for backlight " + "control: %s\n", path, strerror(errno)); + return; + } + + ret = write(fd, val, len); + if (ret == -1) { + xf86DrvMsg(output->scrn->scrnIndex, X_ERROR, "write to %s for backlight " + "control failed: %s\n", path, strerror(errno)); + } + + close(fd); +} + +static int +sna_output_backlight_get(xf86OutputPtr output) +{ + struct sna_output *sna_output = output->driver_private; + char path[BACKLIGHT_PATH_LEN], val[BACKLIGHT_VALUE_LEN]; + int fd, level; + + sprintf(path, "%s/%s/actual_brightness", + BACKLIGHT_CLASS, sna_output->backlight_iface); + fd = open(path, O_RDONLY); + if (fd == -1) { + xf86DrvMsg(output->scrn->scrnIndex, X_ERROR, "failed to open %s " + "for backlight control: %s\n", path, strerror(errno)); + return -1; + } + + memset(val, 0, sizeof(val)); + if (read(fd, val, BACKLIGHT_VALUE_LEN) == -1) { + close(fd); + return -1; + } + + close(fd); + + level = atoi(val); + if (level > sna_output->backlight_max) + level = sna_output->backlight_max; + if (level < 0) + level = -1; + return level; +} + +static int +sna_output_backlight_get_max(xf86OutputPtr output) +{ + struct sna_output *sna_output = output->driver_private; + char path[BACKLIGHT_PATH_LEN], val[BACKLIGHT_VALUE_LEN]; + int fd, max = 0; + + sprintf(path, "%s/%s/max_brightness", + BACKLIGHT_CLASS, sna_output->backlight_iface); + fd = open(path, O_RDONLY); + if (fd == -1) { + xf86DrvMsg(output->scrn->scrnIndex, X_ERROR, "failed to open %s " + "for backlight control: %s\n", path, strerror(errno)); + return -1; + } + + memset(val, 0, sizeof(val)); + if (read(fd, val, BACKLIGHT_VALUE_LEN) == -1) { + close(fd); + return -1; + } + + close(fd); + + max = atoi(val); + if (max <= 0) + max = -1; + return max; +} + +static void +sna_output_backlight_init(xf86OutputPtr output) +{ + struct sna_output *sna_output = output->driver_private; + int i; + + for (i = 0; backlight_interfaces[i] != NULL; i++) { + char path[BACKLIGHT_PATH_LEN]; + struct stat buf; + + sprintf(path, "%s/%s", BACKLIGHT_CLASS, backlight_interfaces[i]); + if (!stat(path, &buf)) { + sna_output->backlight_iface = backlight_interfaces[i]; + sna_output->backlight_max = sna_output_backlight_get_max(output); + if (sna_output->backlight_max > 0) { + sna_output->backlight_active_level = sna_output_backlight_get(output); + xf86DrvMsg(output->scrn->scrnIndex, X_INFO, + "found backlight control interface %s\n", path); + return; + } + } + } + sna_output->backlight_iface = NULL; +} + + +static void +mode_from_kmode(ScrnInfoPtr scrn, + drmModeModeInfoPtr kmode, + DisplayModePtr mode) +{ + memset(mode, 0, sizeof(DisplayModeRec)); + mode->status = MODE_OK; + + mode->Clock = kmode->clock; + + mode->HDisplay = kmode->hdisplay; + mode->HSyncStart = kmode->hsync_start; + mode->HSyncEnd = kmode->hsync_end; + mode->HTotal = kmode->htotal; + mode->HSkew = kmode->hskew; + + mode->VDisplay = kmode->vdisplay; + mode->VSyncStart = kmode->vsync_start; + mode->VSyncEnd = kmode->vsync_end; + mode->VTotal = kmode->vtotal; + mode->VScan = kmode->vscan; + + mode->Flags = kmode->flags; //& FLAG_BITS; + mode->name = strdup(kmode->name); + + if (kmode->type & DRM_MODE_TYPE_DRIVER) + mode->type = M_T_DRIVER; + if (kmode->type & DRM_MODE_TYPE_PREFERRED) + mode->type |= M_T_PREFERRED; + + xf86SetModeCrtc (mode, scrn->adjustFlags); +} + +static void +mode_to_kmode(ScrnInfoPtr scrn, + drmModeModeInfoPtr kmode, + DisplayModePtr mode) +{ + memset(kmode, 0, sizeof(*kmode)); + + kmode->clock = mode->Clock; + kmode->hdisplay = mode->HDisplay; + kmode->hsync_start = mode->HSyncStart; + kmode->hsync_end = mode->HSyncEnd; + kmode->htotal = mode->HTotal; + kmode->hskew = mode->HSkew; + + kmode->vdisplay = mode->VDisplay; + kmode->vsync_start = mode->VSyncStart; + kmode->vsync_end = mode->VSyncEnd; + kmode->vtotal = mode->VTotal; + kmode->vscan = mode->VScan; + + kmode->flags = mode->Flags; //& FLAG_BITS; + if (mode->name) + strncpy(kmode->name, mode->name, DRM_DISPLAY_MODE_LEN); + kmode->name[DRM_DISPLAY_MODE_LEN-1] = 0; +} + +static void +sna_crtc_dpms(xf86CrtcPtr crtc, int mode) +{ + struct sna_crtc *sna_crtc = crtc->driver_private; + DBG(("%s(pipe %d, dpms mode -> %d):= active=%d\n", + __FUNCTION__, sna_crtc->pipe, mode, mode == DPMSModeOn)); + sna_crtc->active = mode == DPMSModeOn; +} + +static Bool +sna_crtc_apply(xf86CrtcPtr crtc) +{ + ScrnInfoPtr scrn = crtc->scrn; + struct sna *sna = to_sna(scrn); + struct sna_crtc *sna_crtc = crtc->driver_private; + struct sna_mode *mode = &sna->mode; + xf86CrtcConfigPtr xf86_config = XF86_CRTC_CONFIG_PTR(crtc->scrn); + uint32_t *output_ids; + int output_count = 0; + int fb_id, x, y; + int i, ret = FALSE; + + output_ids = calloc(sizeof(uint32_t), xf86_config->num_output); + if (!output_ids) + return FALSE; + + for (i = 0; i < xf86_config->num_output; i++) { + xf86OutputPtr output = xf86_config->output[i]; + struct sna_output *sna_output; + + if (output->crtc != crtc) + continue; + + sna_output = output->driver_private; + output_ids[output_count] = + sna_output->mode_output->connector_id; + output_count++; + } + + if (!xf86CrtcRotate(crtc)) + goto done; + + crtc->funcs->gamma_set(crtc, crtc->gamma_red, crtc->gamma_green, + crtc->gamma_blue, crtc->gamma_size); + + x = crtc->x; + y = crtc->y; + fb_id = mode->fb_id; + if (sna_crtc->shadow_fb_id) { + fb_id = sna_crtc->shadow_fb_id; + x = 0; + y = 0; + } + ret = drmModeSetCrtc(sna->kgem.fd, crtc_id(sna_crtc), + fb_id, x, y, output_ids, output_count, + &sna_crtc->kmode); + if (ret) { + xf86DrvMsg(crtc->scrn->scrnIndex, X_ERROR, + "failed to set mode: %s\n", strerror(-ret)); + ret = FALSE; + } else + ret = TRUE; + + if (scrn->pScreen) + xf86_reload_cursors(scrn->pScreen); + +done: + free(output_ids); + return ret; +} + +static Bool +sna_crtc_set_mode_major(xf86CrtcPtr crtc, DisplayModePtr mode, + Rotation rotation, int x, int y) +{ + ScrnInfoPtr scrn = crtc->scrn; + struct sna *sna = to_sna(scrn); + struct sna_crtc *sna_crtc = crtc->driver_private; + struct sna_mode *sna_mode = &sna->mode; + int saved_x, saved_y; + Rotation saved_rotation; + DisplayModeRec saved_mode; + int ret = TRUE; + + DBG(("%s(rotation=%d, x=%d, y=%d)\n", + __FUNCTION__, rotation, x, y)); + + if (sna_mode->fb_id == 0) { + struct kgem_bo *bo = sna_pixmap_pin(sna->front); + if (!bo) + return FALSE; + + ret = drmModeAddFB(sna->kgem.fd, + scrn->virtualX, scrn->virtualY, + scrn->depth, scrn->bitsPerPixel, + bo->pitch, bo->handle, + &sna_mode->fb_id); + if (ret < 0) { + ErrorF("%s: failed to add fb: %dx%d depth=%d, bpp=%d, pitch=%d\n", + __FUNCTION__, + scrn->virtualX, scrn->virtualY, + scrn->depth, scrn->bitsPerPixel, bo->pitch); + return FALSE; + } + + DBG(("%s: handle %d attached to fb %d\n", + __FUNCTION__, bo->handle, sna_mode->fb_id)); + } + + saved_mode = crtc->mode; + saved_x = crtc->x; + saved_y = crtc->y; + saved_rotation = crtc->rotation; + + crtc->mode = *mode; + crtc->x = x; + crtc->y = y; + crtc->rotation = rotation; + + kgem_submit(&sna->kgem); + + mode_to_kmode(scrn, &sna_crtc->kmode, mode); + ret = sna_crtc_apply(crtc); + if (!ret) { + crtc->x = saved_x; + crtc->y = saved_y; + crtc->rotation = saved_rotation; + crtc->mode = saved_mode; + } + + return ret; +} + +static void +sna_crtc_set_cursor_colors(xf86CrtcPtr crtc, int bg, int fg) +{ + +} + +static void +sna_crtc_set_cursor_position (xf86CrtcPtr crtc, int x, int y) +{ + struct sna *sna = to_sna(crtc->scrn); + struct sna_crtc *sna_crtc = crtc->driver_private; + + drmModeMoveCursor(sna->kgem.fd, crtc_id(sna_crtc), x, y); +} + +static void +sna_crtc_load_cursor_argb(xf86CrtcPtr crtc, CARD32 *image) +{ + struct sna *sna = to_sna(crtc->scrn); + struct sna_crtc *sna_crtc = crtc->driver_private; + struct drm_i915_gem_pwrite pwrite; + + pwrite.handle = sna_crtc->cursor; + pwrite.offset = 0; + pwrite.size = 64*64*4; + pwrite.data_ptr = (uintptr_t)image; + (void)drmIoctl(sna->kgem.fd, DRM_IOCTL_I915_GEM_PWRITE, &pwrite); +} + +static void +sna_crtc_hide_cursor(xf86CrtcPtr crtc) +{ + struct sna *sna = to_sna(crtc->scrn); + struct sna_crtc *sna_crtc = crtc->driver_private; + + drmModeSetCursor(sna->kgem.fd, crtc_id(sna_crtc), 0, 64, 64); +} + +static void +sna_crtc_show_cursor(xf86CrtcPtr crtc) +{ + struct sna *sna = to_sna(crtc->scrn); + struct sna_crtc *sna_crtc = crtc->driver_private; + + drmModeSetCursor(sna->kgem.fd, crtc_id(sna_crtc), + sna_crtc->cursor, 64, 64); +} + +static void * +sna_crtc_shadow_allocate(xf86CrtcPtr crtc, int width, int height) +{ + ScrnInfoPtr scrn = crtc->scrn; + struct sna *sna = to_sna(scrn); + struct sna_crtc *sna_crtc = crtc->driver_private; + PixmapPtr shadow; + struct kgem_bo *bo; + + DBG(("%s(%d, %d)\n", __FUNCTION__, width, height)); + + shadow = scrn->pScreen->CreatePixmap(scrn->pScreen, width, height, scrn->depth, 0); + if (!shadow) + return NULL; + + bo = sna_pixmap_pin(shadow); + if (!bo) { + scrn->pScreen->DestroyPixmap(shadow); + return NULL; + } + + if (drmModeAddFB(sna->kgem.fd, + width, height, scrn->depth, scrn->bitsPerPixel, + bo->pitch, bo->handle, + &sna_crtc->shadow_fb_id)) { + ErrorF("%s: failed to add rotate fb: %dx%d depth=%d, bpp=%d, pitch=%d\n", + __FUNCTION__, + width, height, + scrn->depth, scrn->bitsPerPixel, bo->pitch); + scrn->pScreen->DestroyPixmap(shadow); + return NULL; + } + + DBG(("%s: attached handle %d to fb %d\n", + __FUNCTION__, bo->handle, sna_crtc->shadow_fb_id)); + return sna_crtc->shadow = shadow; +} + +static PixmapPtr +sna_crtc_shadow_create(xf86CrtcPtr crtc, void *data, int width, int height) +{ + return data; +} + +static void +sna_crtc_shadow_destroy(xf86CrtcPtr crtc, PixmapPtr pixmap, void *data) +{ + struct sna *sna = to_sna(crtc->scrn); + struct sna_crtc *sna_crtc = crtc->driver_private; + + DBG(("%s(fb=%d, handle=%d)\n", __FUNCTION__, + sna_crtc->shadow_fb_id, sna_pixmap_get_bo(pixmap)->handle)); + + drmModeRmFB(sna->kgem.fd, sna_crtc->shadow_fb_id); + sna_crtc->shadow_fb_id = 0; + + pixmap->drawable.pScreen->DestroyPixmap(pixmap); + sna_crtc->shadow = NULL; +} + +static void +sna_crtc_gamma_set(xf86CrtcPtr crtc, + CARD16 *red, CARD16 *green, CARD16 *blue, int size) +{ + struct sna *sna = to_sna(crtc->scrn); + struct sna_crtc *sna_crtc = crtc->driver_private; + + drmModeCrtcSetGamma(sna->kgem.fd, crtc_id(sna_crtc), + size, red, green, blue); +} + +static void +sna_crtc_destroy(xf86CrtcPtr crtc) +{ + struct sna *sna = to_sna(crtc->scrn); + struct sna_crtc *sna_crtc = crtc->driver_private; + + drmModeSetCursor(sna->kgem.fd, crtc_id(sna_crtc), 0, 64, 64); + gem_close(sna->kgem.fd, sna_crtc->cursor); + + list_del(&sna_crtc->link); + free(sna_crtc); + + crtc->driver_private = NULL; +} + +static const xf86CrtcFuncsRec sna_crtc_funcs = { + .dpms = sna_crtc_dpms, + .set_mode_major = sna_crtc_set_mode_major, + .set_cursor_colors = sna_crtc_set_cursor_colors, + .set_cursor_position = sna_crtc_set_cursor_position, + .show_cursor = sna_crtc_show_cursor, + .hide_cursor = sna_crtc_hide_cursor, + .load_cursor_argb = sna_crtc_load_cursor_argb, + .shadow_create = sna_crtc_shadow_create, + .shadow_allocate = sna_crtc_shadow_allocate, + .shadow_destroy = sna_crtc_shadow_destroy, + .gamma_set = sna_crtc_gamma_set, + .destroy = sna_crtc_destroy, +}; + +static void +sna_crtc_init(ScrnInfoPtr scrn, struct sna_mode *mode, int num) +{ + struct sna *sna = to_sna(scrn); + xf86CrtcPtr crtc; + struct sna_crtc *sna_crtc; + struct drm_i915_get_pipe_from_crtc_id get_pipe; + + sna_crtc = calloc(sizeof(struct sna_crtc), 1); + if (sna_crtc == NULL) + return; + + crtc = xf86CrtcCreate(scrn, &sna_crtc_funcs); + if (crtc == NULL) { + free(sna_crtc); + return; + } + + sna_crtc->mode_crtc = drmModeGetCrtc(sna->kgem.fd, + mode->mode_res->crtcs[num]); + get_pipe.pipe = 0; + get_pipe.crtc_id = sna_crtc->mode_crtc->crtc_id; + drmIoctl(sna->kgem.fd, + DRM_IOCTL_I915_GET_PIPE_FROM_CRTC_ID, + &get_pipe); + sna_crtc->pipe = get_pipe.pipe; + + crtc->driver_private = sna_crtc; + + sna_crtc->cursor = gem_create(sna->kgem.fd, 64*64*4); + + sna_crtc->crtc = crtc; + list_add(&sna_crtc->link, &mode->crtcs); +} + +static Bool +is_panel(int type) +{ + return (type == DRM_MODE_CONNECTOR_LVDS || + type == DRM_MODE_CONNECTOR_eDP); +} + +static xf86OutputStatus +sna_output_detect(xf86OutputPtr output) +{ + /* go to the hw and retrieve a new output struct */ + struct sna *sna = to_sna(output->scrn); + struct sna_output *sna_output = output->driver_private; + xf86OutputStatus status; + + drmModeFreeConnector(sna_output->mode_output); + sna_output->mode_output = + drmModeGetConnector(sna->kgem.fd, sna_output->output_id); + + switch (sna_output->mode_output->connection) { + case DRM_MODE_CONNECTED: + status = XF86OutputStatusConnected; + break; + case DRM_MODE_DISCONNECTED: + status = XF86OutputStatusDisconnected; + break; + default: + case DRM_MODE_UNKNOWNCONNECTION: + status = XF86OutputStatusUnknown; + break; + } + return status; +} + +static Bool +sna_output_mode_valid(xf86OutputPtr output, DisplayModePtr pModes) +{ + struct sna_output *sna_output = output->driver_private; + + /* + * If the connector type is a panel, we will use the panel limit to + * verfiy whether the mode is valid. + */ + if (sna_output->has_panel_limits) { + if (pModes->HDisplay > sna_output->panel_hdisplay || + pModes->VDisplay > sna_output->panel_vdisplay) + return MODE_PANEL; + } + + return MODE_OK; +} + +static void +sna_output_attach_edid(xf86OutputPtr output) +{ + struct sna *sna = to_sna(output->scrn); + struct sna_output *sna_output = output->driver_private; + drmModeConnectorPtr koutput = sna_output->mode_output; + drmModePropertyBlobPtr edid_blob = NULL; + xf86MonPtr mon = NULL; + int i; + + /* look for an EDID property */ + for (i = 0; i < koutput->count_props; i++) { + drmModePropertyPtr props; + + props = drmModeGetProperty(sna->kgem.fd, koutput->props[i]); + if (!props) + continue; + + if (!(props->flags & DRM_MODE_PROP_BLOB)) { + drmModeFreeProperty(props); + continue; + } + + if (!strcmp(props->name, "EDID")) { + drmModeFreePropertyBlob(edid_blob); + edid_blob = + drmModeGetPropertyBlob(sna->kgem.fd, + koutput->prop_values[i]); + } + drmModeFreeProperty(props); + } + + if (edid_blob) { + mon = xf86InterpretEDID(output->scrn->scrnIndex, + edid_blob->data); + + if (mon && edid_blob->length > 128) + mon->flags |= MONITOR_EDID_COMPLETE_RAWDATA; + } + + xf86OutputSetEDID(output, mon); + + if (edid_blob) + drmModeFreePropertyBlob(edid_blob); +} + +static DisplayModePtr +sna_output_panel_edid(xf86OutputPtr output, DisplayModePtr modes) +{ + xf86MonPtr mon = output->MonInfo; + + if (!mon || !GTF_SUPPORTED(mon->features.msc)) { + DisplayModePtr i, m, p = NULL; + int max_x = 0, max_y = 0; + float max_vrefresh = 0.0; + + for (m = modes; m; m = m->next) { + if (m->type & M_T_PREFERRED) + p = m; + max_x = max(max_x, m->HDisplay); + max_y = max(max_y, m->VDisplay); + max_vrefresh = max(max_vrefresh, xf86ModeVRefresh(m)); + } + + max_vrefresh = max(max_vrefresh, 60.0); + max_vrefresh *= (1 + SYNC_TOLERANCE); + + m = xf86GetDefaultModes(); + xf86ValidateModesSize(output->scrn, m, max_x, max_y, 0); + + for (i = m; i; i = i->next) { + if (xf86ModeVRefresh(i) > max_vrefresh) + i->status = MODE_VSYNC; + if (p && i->HDisplay >= p->HDisplay && + i->VDisplay >= p->VDisplay && + xf86ModeVRefresh(i) >= xf86ModeVRefresh(p)) + i->status = MODE_VSYNC; + } + + xf86PruneInvalidModes(output->scrn, &m, FALSE); + + modes = xf86ModesAdd(modes, m); + } + + return modes; +} + +static DisplayModePtr +sna_output_get_modes(xf86OutputPtr output) +{ + struct sna_output *sna_output = output->driver_private; + drmModeConnectorPtr koutput = sna_output->mode_output; + DisplayModePtr Modes = NULL; + int i; + + sna_output_attach_edid(output); + + /* modes should already be available */ + for (i = 0; i < koutput->count_modes; i++) { + DisplayModePtr Mode; + + Mode = calloc(1, sizeof(DisplayModeRec)); + if (Mode) { + mode_from_kmode(output->scrn, &koutput->modes[i], Mode); + Modes = xf86ModesAdd(Modes, Mode); + } + } + + /* + * If the connector type is a panel, we will traverse the kernel mode to + * get the panel limit. And then add all the standard modes to fake + * the fullscreen experience. + * If it is incorrect, please fix me. + */ + sna_output->has_panel_limits = FALSE; + if (is_panel(koutput->connector_type)) { + for (i = 0; i < koutput->count_modes; i++) { + drmModeModeInfo *mode_ptr; + + mode_ptr = &koutput->modes[i]; + if (mode_ptr->hdisplay > sna_output->panel_hdisplay) + sna_output->panel_hdisplay = mode_ptr->hdisplay; + if (mode_ptr->vdisplay > sna_output->panel_vdisplay) + sna_output->panel_vdisplay = mode_ptr->vdisplay; + } + + sna_output->has_panel_limits = + sna_output->panel_hdisplay && + sna_output->panel_vdisplay; + + Modes = sna_output_panel_edid(output, Modes); + } + + return Modes; +} + +static void +sna_output_destroy(xf86OutputPtr output) +{ + struct sna_output *sna_output = output->driver_private; + int i; + + for (i = 0; i < sna_output->num_props; i++) { + drmModeFreeProperty(sna_output->props[i].mode_prop); + free(sna_output->props[i].atoms); + } + free(sna_output->props); + + drmModeFreeConnector(sna_output->mode_output); + sna_output->mode_output = NULL; + + list_del(&sna_output->link); + free(sna_output); + + output->driver_private = NULL; +} + +static void +sna_output_dpms_backlight(xf86OutputPtr output, int oldmode, int mode) +{ + struct sna_output *sna_output = output->driver_private; + + if (!sna_output->backlight_iface) + return; + + if (mode == DPMSModeOn) { + /* If we're going from off->on we may need to turn on the backlight. */ + if (oldmode != DPMSModeOn) + sna_output_backlight_set(output, + sna_output->backlight_active_level); + } else { + /* Only save the current backlight value if we're going from on to off. */ + if (oldmode == DPMSModeOn) + sna_output->backlight_active_level = sna_output_backlight_get(output); + sna_output_backlight_set(output, 0); + } +} + +static void +sna_output_dpms(xf86OutputPtr output, int dpms) +{ + struct sna *sna = to_sna(output->scrn); + struct sna_output *sna_output = output->driver_private; + drmModeConnectorPtr koutput = sna_output->mode_output; + int i; + + for (i = 0; i < koutput->count_props; i++) { + drmModePropertyPtr props; + + props = drmModeGetProperty(sna->kgem.fd, koutput->props[i]); + if (!props) + continue; + + if (!strcmp(props->name, "DPMS")) { + drmModeConnectorSetProperty(sna->kgem.fd, + sna_output->output_id, + props->prop_id, + dpms); + sna_output_dpms_backlight(output, + sna_output->dpms_mode, + dpms); + sna_output->dpms_mode = dpms; + drmModeFreeProperty(props); + return; + } + + drmModeFreeProperty(props); + } +} + +int +sna_output_dpms_status(xf86OutputPtr output) +{ + struct sna_output *sna_output = output->driver_private; + return sna_output->dpms_mode; +} + +static Bool +sna_property_ignore(drmModePropertyPtr prop) +{ + if (!prop) + return TRUE; + + /* ignore blob prop */ + if (prop->flags & DRM_MODE_PROP_BLOB) + return TRUE; + + /* ignore standard property */ + if (!strcmp(prop->name, "EDID") || + !strcmp(prop->name, "DPMS")) + return TRUE; + + return FALSE; +} + +#define BACKLIGHT_NAME "Backlight" +#define BACKLIGHT_DEPRECATED_NAME "BACKLIGHT" +static Atom backlight_atom, backlight_deprecated_atom; + +static void +sna_output_create_resources(xf86OutputPtr output) +{ + struct sna *sna = to_sna(output->scrn); + struct sna_output *sna_output = output->driver_private; + drmModeConnectorPtr mode_output = sna_output->mode_output; + int i, j, err; + + sna_output->props = calloc(mode_output->count_props, + sizeof(struct sna_property)); + if (!sna_output->props) + return; + + sna_output->num_props = 0; + for (i = j = 0; i < mode_output->count_props; i++) { + drmModePropertyPtr drmmode_prop; + + drmmode_prop = drmModeGetProperty(sna->kgem.fd, + mode_output->props[i]); + if (sna_property_ignore(drmmode_prop)) { + drmModeFreeProperty(drmmode_prop); + continue; + } + + sna_output->props[j].mode_prop = drmmode_prop; + sna_output->props[j].value = mode_output->prop_values[i]; + j++; + } + sna_output->num_props = j; + + for (i = 0; i < sna_output->num_props; i++) { + struct sna_property *p = &sna_output->props[i]; + drmModePropertyPtr drmmode_prop = p->mode_prop; + + if (drmmode_prop->flags & DRM_MODE_PROP_RANGE) { + INT32 range[2]; + + p->num_atoms = 1; + p->atoms = calloc(p->num_atoms, sizeof(Atom)); + if (!p->atoms) + continue; + + p->atoms[0] = MakeAtom(drmmode_prop->name, strlen(drmmode_prop->name), TRUE); + range[0] = drmmode_prop->values[0]; + range[1] = drmmode_prop->values[1]; + err = RRConfigureOutputProperty(output->randr_output, p->atoms[0], + FALSE, TRUE, + drmmode_prop->flags & DRM_MODE_PROP_IMMUTABLE ? TRUE : FALSE, + 2, range); + if (err != 0) { + xf86DrvMsg(output->scrn->scrnIndex, X_ERROR, + "RRConfigureOutputProperty error, %d\n", err); + } + err = RRChangeOutputProperty(output->randr_output, p->atoms[0], + XA_INTEGER, 32, PropModeReplace, 1, &p->value, FALSE, TRUE); + if (err != 0) { + xf86DrvMsg(output->scrn->scrnIndex, X_ERROR, + "RRChangeOutputProperty error, %d\n", err); + } + } else if (drmmode_prop->flags & DRM_MODE_PROP_ENUM) { + p->num_atoms = drmmode_prop->count_enums + 1; + p->atoms = calloc(p->num_atoms, sizeof(Atom)); + if (!p->atoms) + continue; + + p->atoms[0] = MakeAtom(drmmode_prop->name, strlen(drmmode_prop->name), TRUE); + for (j = 1; j <= drmmode_prop->count_enums; j++) { + struct drm_mode_property_enum *e = &drmmode_prop->enums[j-1]; + p->atoms[j] = MakeAtom(e->name, strlen(e->name), TRUE); + } + + err = RRConfigureOutputProperty(output->randr_output, p->atoms[0], + FALSE, FALSE, + drmmode_prop->flags & DRM_MODE_PROP_IMMUTABLE ? TRUE : FALSE, + p->num_atoms - 1, (INT32 *)&p->atoms[1]); + if (err != 0) { + xf86DrvMsg(output->scrn->scrnIndex, X_ERROR, + "RRConfigureOutputProperty error, %d\n", err); + } + + for (j = 0; j < drmmode_prop->count_enums; j++) + if (drmmode_prop->enums[j].value == p->value) + break; + /* there's always a matching value */ + err = RRChangeOutputProperty(output->randr_output, p->atoms[0], + XA_ATOM, 32, PropModeReplace, 1, &p->atoms[j+1], FALSE, TRUE); + if (err != 0) { + xf86DrvMsg(output->scrn->scrnIndex, X_ERROR, + "RRChangeOutputProperty error, %d\n", err); + } + } + } + + if (sna_output->backlight_iface) { + INT32 data, backlight_range[2]; + + /* Set up the backlight property, which takes effect + * immediately and accepts values only within the + * backlight_range. + */ + backlight_atom = MakeAtom(BACKLIGHT_NAME, sizeof(BACKLIGHT_NAME) - 1, TRUE); + backlight_deprecated_atom = MakeAtom(BACKLIGHT_DEPRECATED_NAME, + sizeof(BACKLIGHT_DEPRECATED_NAME) - 1, TRUE); + + backlight_range[0] = 0; + backlight_range[1] = sna_output->backlight_max; + err = RRConfigureOutputProperty(output->randr_output, + backlight_atom, + FALSE, TRUE, FALSE, + 2, backlight_range); + if (err != 0) { + xf86DrvMsg(output->scrn->scrnIndex, X_ERROR, + "RRConfigureOutputProperty error, %d\n", err); + } + err = RRConfigureOutputProperty(output->randr_output, + backlight_deprecated_atom, + FALSE, TRUE, FALSE, + 2, backlight_range); + if (err != 0) { + xf86DrvMsg(output->scrn->scrnIndex, X_ERROR, + "RRConfigureOutputProperty error, %d\n", err); + } + /* Set the current value of the backlight property */ + data = sna_output->backlight_active_level; + err = RRChangeOutputProperty(output->randr_output, backlight_atom, + XA_INTEGER, 32, PropModeReplace, 1, &data, + FALSE, TRUE); + if (err != 0) { + xf86DrvMsg(output->scrn->scrnIndex, X_ERROR, + "RRChangeOutputProperty error, %d\n", err); + } + err = RRChangeOutputProperty(output->randr_output, backlight_deprecated_atom, + XA_INTEGER, 32, PropModeReplace, 1, &data, + FALSE, TRUE); + if (err != 0) { + xf86DrvMsg(output->scrn->scrnIndex, X_ERROR, + "RRChangeOutputProperty error, %d\n", err); + } + } +} + +static Bool +sna_output_set_property(xf86OutputPtr output, Atom property, + RRPropertyValuePtr value) +{ + struct sna *sna = to_sna(output->scrn); + struct sna_output *sna_output = output->driver_private; + int i; + + if (property == backlight_atom || property == backlight_deprecated_atom) { + INT32 val; + + if (value->type != XA_INTEGER || value->format != 32 || + value->size != 1) + { + return FALSE; + } + + val = *(INT32 *)value->data; + if (val < 0 || val > sna_output->backlight_max) + return FALSE; + + if (sna_output->dpms_mode == DPMSModeOn) + sna_output_backlight_set(output, val); + sna_output->backlight_active_level = val; + return TRUE; + } + + for (i = 0; i < sna_output->num_props; i++) { + struct sna_property *p = &sna_output->props[i]; + + if (p->atoms[0] != property) + continue; + + if (p->mode_prop->flags & DRM_MODE_PROP_RANGE) { + uint32_t val; + + if (value->type != XA_INTEGER || value->format != 32 || + value->size != 1) + return FALSE; + val = *(uint32_t *)value->data; + + drmModeConnectorSetProperty(sna->kgem.fd, sna_output->output_id, + p->mode_prop->prop_id, (uint64_t)val); + return TRUE; + } else if (p->mode_prop->flags & DRM_MODE_PROP_ENUM) { + Atom atom; + const char *name; + int j; + + if (value->type != XA_ATOM || value->format != 32 || value->size != 1) + return FALSE; + memcpy(&atom, value->data, 4); + name = NameForAtom(atom); + + /* search for matching name string, then set its value down */ + for (j = 0; j < p->mode_prop->count_enums; j++) { + if (!strcmp(p->mode_prop->enums[j].name, name)) { + drmModeConnectorSetProperty(sna->kgem.fd, sna_output->output_id, + p->mode_prop->prop_id, p->mode_prop->enums[j].value); + return TRUE; + } + } + return FALSE; + } + } + + /* We didn't recognise this property, just report success in order + * to allow the set to continue, otherwise we break setting of + * common properties like EDID. + */ + return TRUE; +} + +static Bool +sna_output_get_property(xf86OutputPtr output, Atom property) +{ + struct sna_output *sna_output = output->driver_private; + int err; + + if (property == backlight_atom || property == backlight_deprecated_atom) { + INT32 val; + + if (! sna_output->backlight_iface) + return FALSE; + + val = sna_output_backlight_get(output); + if (val < 0) + return FALSE; + + err = RRChangeOutputProperty(output->randr_output, property, + XA_INTEGER, 32, PropModeReplace, 1, &val, + FALSE, TRUE); + if (err != 0) { + xf86DrvMsg(output->scrn->scrnIndex, X_ERROR, + "RRChangeOutputProperty error, %d\n", err); + return FALSE; + } + + return TRUE; + } + + return FALSE; +} + +static const xf86OutputFuncsRec sna_output_funcs = { + .create_resources = sna_output_create_resources, +#ifdef RANDR_12_INTERFACE + .set_property = sna_output_set_property, + .get_property = sna_output_get_property, +#endif + .dpms = sna_output_dpms, + .detect = sna_output_detect, + .mode_valid = sna_output_mode_valid, + + .get_modes = sna_output_get_modes, + .destroy = sna_output_destroy +}; + +static const int subpixel_conv_table[7] = { + 0, + SubPixelUnknown, + SubPixelHorizontalRGB, + SubPixelHorizontalBGR, + SubPixelVerticalRGB, + SubPixelVerticalBGR, + SubPixelNone +}; + +static const char *output_names[] = { + "None", + "VGA", + "DVI", + "DVI", + "DVI", + "Composite", + "TV", + "LVDS", + "CTV", + "DIN", + "DP", + "HDMI", + "HDMI", + "TV", + "eDP", +}; + +static void +sna_output_init(ScrnInfoPtr scrn, struct sna_mode *mode, int num) +{ + struct sna *sna = to_sna(scrn); + xf86OutputPtr output; + drmModeConnectorPtr koutput; + drmModeEncoderPtr kencoder; + struct sna_output *sna_output; + const char *output_name; + char name[32]; + + koutput = drmModeGetConnector(sna->kgem.fd, + mode->mode_res->connectors[num]); + if (!koutput) + return; + + kencoder = drmModeGetEncoder(sna->kgem.fd, koutput->encoders[0]); + if (!kencoder) { + drmModeFreeConnector(koutput); + return; + } + + if (koutput->connector_type < ARRAY_SIZE(output_names)) + output_name = output_names[koutput->connector_type]; + else + output_name = "UNKNOWN"; + snprintf(name, 32, "%s%d", output_name, koutput->connector_type_id); + + output = xf86OutputCreate (scrn, &sna_output_funcs, name); + if (!output) { + drmModeFreeEncoder(kencoder); + drmModeFreeConnector(koutput); + return; + } + + sna_output = calloc(sizeof(struct sna_output), 1); + if (!sna_output) { + xf86OutputDestroy(output); + drmModeFreeConnector(koutput); + drmModeFreeEncoder(kencoder); + return; + } + + sna_output->output_id = mode->mode_res->connectors[num]; + sna_output->mode_output = koutput; + sna_output->mode_encoder = kencoder; + sna_output->mode = mode; + + output->mm_width = koutput->mmWidth; + output->mm_height = koutput->mmHeight; + + output->subpixel_order = subpixel_conv_table[koutput->subpixel]; + output->driver_private = sna_output; + + if (is_panel(koutput->connector_type)) + sna_output_backlight_init(output); + + output->possible_crtcs = kencoder->possible_crtcs; + output->possible_clones = kencoder->possible_clones; + output->interlaceAllowed = TRUE; + + sna_output->output = output; + list_add(&sna_output->link, &mode->outputs); +} + +struct sna_visit_set_pixmap_window { + PixmapPtr old, new; +}; + +static int +sna_visit_set_window_pixmap(WindowPtr window, pointer data) +{ + struct sna_visit_set_pixmap_window *visit = data; + ScreenPtr screen = window->drawable.pScreen; + + if (screen->GetWindowPixmap(window) == visit->old) { + screen->SetWindowPixmap(window, visit->new); + return WT_WALKCHILDREN; + } + + return WT_DONTWALKCHILDREN; +} + +static void +sn_redirect_screen_pixmap(ScrnInfoPtr scrn, PixmapPtr old, PixmapPtr new) +{ + ScreenPtr screen = scrn->pScreen; + struct sna_visit_set_pixmap_window visit; + + visit.old = old; + visit.new = new; + TraverseTree(screen->root, sna_visit_set_window_pixmap, &visit); + + screen->SetScreenPixmap(new); +} + +static Bool +sna_xf86crtc_resize(ScrnInfoPtr scrn, int width, int height) +{ + struct sna *sna = to_sna(scrn); + xf86CrtcConfigPtr xf86_config = XF86_CRTC_CONFIG_PTR(scrn); + struct sna_mode *mode = &sna->mode; + PixmapPtr old_front; + uint32_t old_fb_id; + struct kgem_bo *bo; + int i; + + DBG(("%s (%d, %d) -> (%d, %d)\n", + __FUNCTION__, + scrn->virtualX, scrn->virtualY, + width, height)); + + if (scrn->virtualX == width && scrn->virtualY == height) + return TRUE; + + kgem_submit(&sna->kgem); + + old_fb_id = mode->fb_id; + old_front = sna->front; + + sna->front = scrn->pScreen->CreatePixmap(scrn->pScreen, + width, height, + scrn->depth, + SNA_CREATE_FB); + if (!sna->front) + goto fail; + + bo = sna_pixmap_pin(sna->front); + if (!bo) + goto fail; + + if (drmModeAddFB(sna->kgem.fd, width, height, + scrn->depth, scrn->bitsPerPixel, + bo->pitch, bo->handle, + &mode->fb_id)) { + ErrorF("%s: failed to add fb: %dx%d depth=%d, bpp=%d, pitch=%d\n", + __FUNCTION__, + width, height, + scrn->depth, scrn->bitsPerPixel, bo->pitch); + goto fail; + } + + for (i = 0; i < xf86_config->num_crtc; i++) { + xf86CrtcPtr crtc = xf86_config->crtc[i]; + + if (!crtc->enabled) + continue; + + if (!sna_crtc_apply(crtc)) + goto fail; + } + + scrn->virtualX = width; + scrn->virtualY = height; + scrn->displayWidth = bo->pitch / sna->mode.cpp; + + sn_redirect_screen_pixmap(scrn, old_front, sna->front); + + if (old_fb_id) + drmModeRmFB(sna->kgem.fd, old_fb_id); + scrn->pScreen->DestroyPixmap(old_front); + + return TRUE; + +fail: + if (old_fb_id != mode->fb_id) + drmModeRmFB(sna->kgem.fd, mode->fb_id); + mode->fb_id = old_fb_id; + + if (sna->front) + scrn->pScreen->DestroyPixmap(sna->front); + sna->front = old_front; + return FALSE; +} + +static Bool do_page_flip(struct sna *sna, + int ref_crtc_hw_id) +{ + xf86CrtcConfigPtr config = XF86_CRTC_CONFIG_PTR(sna->scrn); + int i; + + /* + * Queue flips on all enabled CRTCs + * Note that if/when we get per-CRTC buffers, we'll have to update this. + * Right now it assumes a single shared fb across all CRTCs, with the + * kernel fixing up the offset of each CRTC as necessary. + * + * Also, flips queued on disabled or incorrectly configured displays + * may never complete; this is a configuration error. + */ + for (i = 0; i < config->num_crtc; i++) { + struct sna_crtc *crtc = config->crtc[i]->driver_private; + uintptr_t data; + + if (!config->crtc[i]->enabled) + continue; + + /* Only the reference crtc will finally deliver its page flip + * completion event. All other crtc's events will be discarded. + */ + + data = (uintptr_t)sna; + data |= sna_crtc_to_pipe(crtc->crtc) == ref_crtc_hw_id; + + if (drmModePageFlip(sna->kgem.fd, + crtc_id(crtc), + sna->mode.fb_id, + DRM_MODE_PAGE_FLIP_EVENT, + (void*)data)) { + xf86DrvMsg(sna->scrn->scrnIndex, X_WARNING, + "flip queue failed: %s\n", strerror(errno)); + return FALSE; + } + } + + return TRUE; +} + +Bool +sna_do_pageflip(struct sna *sna, + PixmapPtr pixmap, + DRI2FrameEventPtr flip_info, int ref_crtc_hw_id) +{ + ScrnInfoPtr scrn = sna->scrn; + struct sna_mode *mode = &sna->mode; + struct kgem_bo *bo = sna_pixmap_pin(pixmap); + int old_fb_id; + + if (!bo) + return FALSE; + + /* + * Create a new handle for the back buffer + */ + old_fb_id = mode->fb_id; + if (drmModeAddFB(sna->kgem.fd, scrn->virtualX, scrn->virtualY, + scrn->depth, scrn->bitsPerPixel, + bo->pitch, bo->handle, + &mode->fb_id)) { + ErrorF("%s: failed to add fb: %dx%d depth=%d, bpp=%d, pitch=%d\n", + __FUNCTION__, + scrn->virtualX, scrn->virtualY, + scrn->depth, scrn->bitsPerPixel, bo->pitch); + return FALSE; + } + + kgem_submit(&sna->kgem); + + /* + * Queue flips on all enabled CRTCs + * Note that if/when we get per-CRTC buffers, we'll have to update this. + * Right now it assumes a single shared fb across all CRTCs, with the + * kernel fixing up the offset of each CRTC as necessary. + * + * Also, flips queued on disabled or incorrectly configured displays + * may never complete; this is a configuration error. + */ + mode->fe_frame = 0; + mode->fe_tv_sec = 0; + mode->fe_tv_usec = 0; + + mode->flip_info = flip_info; + mode->flip_count++; + + if (do_page_flip(sna, ref_crtc_hw_id)) { + PixmapPtr old_front = sna->front; + + sna->front = pixmap; + pixmap->refcnt++; + sn_redirect_screen_pixmap(scrn, old_front, sna->front); + scrn->displayWidth = bo->pitch / sna->mode.cpp; + + drmModeRmFB(sna->kgem.fd, old_fb_id); + scrn->pScreen->DestroyPixmap(old_front); + return TRUE; + } else { + drmModeRmFB(sna->kgem.fd, mode->fb_id); + mode->fb_id = old_fb_id; + return FALSE; + } +} + +static const xf86CrtcConfigFuncsRec sna_xf86crtc_config_funcs = { + sna_xf86crtc_resize +}; + +static void +sna_vblank_handler(int fd, unsigned int frame, unsigned int tv_sec, + unsigned int tv_usec, void *event_data) +{ + sna_dri2_frame_event(frame, tv_sec, tv_usec, event_data); +} + +static void +sna_page_flip_handler(int fd, unsigned int frame, unsigned int tv_sec, + unsigned int tv_usec, void *event_data) +{ + struct sna *sna = (struct sna *)((uintptr_t)event_data & ~1); + struct sna_mode *mode = &sna->mode; + + /* Is this the event whose info shall be delivered to higher level? */ + if ((uintptr_t)event_data & 1) { + /* Yes: Cache msc, ust for later delivery. */ + mode->fe_frame = frame; + mode->fe_tv_sec = tv_sec; + mode->fe_tv_usec = tv_usec; + } + + /* Last crtc completed flip? */ + if (--mode->flip_count > 0) + return; + + if (mode->flip_info == NULL) + return; + + /* Deliver cached msc, ust from reference crtc to flip event handler */ + sna_dri2_flip_event(mode->fe_frame, mode->fe_tv_sec, + mode->fe_tv_usec, mode->flip_info); +} + +static void +drm_wakeup_handler(pointer data, int err, pointer p) +{ + struct sna *sna; + fd_set *read_mask; + + if (data == NULL || err < 0) + return; + + sna = data; + read_mask = p; + if (FD_ISSET(sna->kgem.fd, read_mask)) + drmHandleEvent(sna->kgem.fd, &sna->mode.event_context); +} + +Bool sna_mode_pre_init(ScrnInfoPtr scrn, struct sna *sna) +{ + struct sna_mode *mode = &sna->mode; + unsigned int i; + + list_init(&mode->crtcs); + list_init(&mode->outputs); + + xf86CrtcConfigInit(scrn, &sna_xf86crtc_config_funcs); + + mode->mode_res = drmModeGetResources(sna->kgem.fd); + if (!mode->mode_res) { + xf86DrvMsg(scrn->scrnIndex, X_ERROR, + "failed to get resources: %s\n", strerror(errno)); + return FALSE; + } + + xf86CrtcSetSizeRange(scrn, + 320, 200, + mode->mode_res->max_width, + mode->mode_res->max_height); + for (i = 0; i < mode->mode_res->count_crtcs; i++) + sna_crtc_init(scrn, mode, i); + + for (i = 0; i < mode->mode_res->count_connectors; i++) + sna_output_init(scrn, mode, i); + + xf86InitialConfiguration(scrn, TRUE); + + mode->event_context.version = DRM_EVENT_CONTEXT_VERSION; + mode->event_context.vblank_handler = sna_vblank_handler; + mode->event_context.page_flip_handler = sna_page_flip_handler; + + return TRUE; +} + +void +sna_mode_init(struct sna *sna) +{ + struct sna_mode *mode = &sna->mode; + + /* We need to re-register the mode->fd for the synchronisation + * feedback on every server generation, so perform the + * registration within ScreenInit and not PreInit. + */ + mode->flip_count = 0; + AddGeneralSocket(sna->kgem.fd); + RegisterBlockAndWakeupHandlers((BlockHandlerProcPtr)NoopDDA, + drm_wakeup_handler, sna); +} + +void +sna_mode_remove_fb(struct sna *sna) +{ + struct sna_mode *mode = &sna->mode; + + if (mode->fb_id) { + drmModeRmFB(sna->kgem.fd, mode->fb_id); + mode->fb_id = 0; + } +} + +void +sna_mode_fini(struct sna *sna) +{ + struct sna_mode *mode = &sna->mode; + +#if 0 + while (!list_is_empty(&mode->crtcs)) { + xf86CrtcDestroy(list_first_entry(&mode->crtcs, + struct sna_crtc, + link)->crtc); + } + + while (!list_is_empty(&mode->outputs)) { + xf86OutputDestroy(list_first_entry(&mode->outputs, + struct sna_output, + link)->output); + } +#endif + + if (mode->fb_id) { + drmModeRmFB(sna->kgem.fd, mode->fb_id); + mode->fb_id = 0; + } + + /* mode->shadow_fb_id should have been destroyed already */ +} diff --git a/src/sna/sna_dri.c b/src/sna/sna_dri.c new file mode 100644 index 00000000..ea84fb21 --- /dev/null +++ b/src/sna/sna_dri.c @@ -0,0 +1,1446 @@ +/************************************************************************** + +Copyright 2001 VA Linux Systems Inc., Fremont, California. +Copyright © 2002 by David Dawes + +All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the "Software"), +to deal in the Software without restriction, including without limitation +on the rights to use, copy, modify, merge, publish, distribute, sub +license, and/or sell copies of the Software, and to permit persons to whom +the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice (including the next +paragraph) shall be included in all copies or substantial portions of the +Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL +ATI, VA LINUX SYSTEMS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, +DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR +OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE +USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/* + * Authors: Jeff Hartmann <jhartmann@valinux.com> + * David Dawes <dawes@xfree86.org> + * Keith Whitwell <keith@tungstengraphics.com> + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <stdio.h> +#include <string.h> +#include <assert.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/ioctl.h> +#include <unistd.h> +#include <fcntl.h> +#include <sys/time.h> +#include <sys/mman.h> +#include <time.h> +#include <errno.h> + +#include "xf86.h" +#include "xf86_OSproc.h" +#include "xf86Priv.h" + +#include "xf86PciInfo.h" +#include "xf86Pci.h" + +#include "windowstr.h" +#include "gcstruct.h" + +#include "sna.h" +#include "sna_reg.h" + +#include "i915_drm.h" + +#include "dri2.h" + +#if DEBUG_DRI +#undef DBG +#define DBG(x) ErrorF x +#endif + +struct sna_dri2_private { + int refcnt; + PixmapPtr pixmap; + struct kgem_bo *bo; + unsigned int attachment; +}; + +static struct kgem_bo *sna_pixmap_set_dri(struct sna *sna, + PixmapPtr pixmap) +{ + struct sna_pixmap *priv; + + priv = sna_pixmap_force_to_gpu(pixmap); + if (priv == NULL) + return NULL; + + if (priv->flush) + return priv->gpu_bo; + + if (priv->cpu_damage) + list_add(&priv->list, &sna->dirty_pixmaps); + + priv->flush = 1; + priv->gpu_bo->flush = 1; + if (priv->gpu_bo->exec) + sna->kgem.flush = 1; + + priv->pinned = 1; + return priv->gpu_bo; +} + +#if DRI2INFOREC_VERSION < 2 +static DRI2BufferPtr +sna_dri2_create_buffers(DrawablePtr drawable, unsigned int *attachments, + int count) +{ + ScreenPtr screen = drawable->pScreen; + ScrnInfoPtr scrn = xf86Screens[screen->myNum]; + struct sna *sna = to_sna(scrn); + DRI2BufferPtr buffers; + struct sna_dri2_private *privates; + int depth = -1; + int i; + + buffers = calloc(count, sizeof *buffers); + if (buffers == NULL) + return NULL; + privates = calloc(count, sizeof *privates); + if (privates == NULL) { + free(buffers); + return NULL; + } + + for (i = 0; i < count; i++) { + PixmapPtr pixmap = NULL; + if (attachments[i] == DRI2BufferFrontLeft) { + pixmap = get_drawable_pixmap(drawable); + pixmap->refcnt++; + bo = sna_pixmap_set_dri(sna, pixmap); + } else if (attachments[i] == DRI2BufferBackLeft) { + pixmap = screen->CreatePixmap(screen, + drawable->width, drawable->height, drawable->depth, + 0); + if (!pixmap) + goto unwind; + + bo = sna_pixmap_set_dri(sna, pixmap); + } else if (attachments[i] == DRI2BufferStencil && depth != -1) { + buffers[i] = buffers[depth]; + buffers[i].attachment = attachments[i]; + privates[depth].refcnt++; + continue; + } else { + unsigned int tiling = I915_TILING_X; + if (SUPPORTS_YTILING(intel)) { + switch (attachment) { + case DRI2BufferDepth: + case DRI2BufferDepthStencil: + tiling = I915_TILING_Y; + break; + } + } + + bo = kgem_create_2d(&intel->kgem, + drawable->width, + drawable->height, + 32, tiling); + if (!bo) + goto unwind; + } + + if (attachments[i] == DRI2BufferDepth) + depth = i; + + buffers[i].attachment = attachments[i]; + buffers[i].pitch = pitch; + buffers[i].cpp = bpp / 8; + buffers[i].driverPrivate = &privates[i]; + buffers[i].flags = 0; /* not tiled */ + buffers[i].name = kgem_bo_flink(&intel->kgem, bo); + privates[i].refcnt = 1; + privates[i].pixmap = pixmap; + privates[i].bo = bo; + privates[i].attachment = attachments[i]; + + if (buffers[i].name == 0) + goto unwind; + } + + return buffers; + +unwind: + do { + if (--privates[i].refcnt == 0) { + if (privates[i].pixmap) + screen->DestroyPixmap(privates[i].pixmap); + else + gem_close(privates[i].handle); + } + } while (i--); + free(privates); + free(buffers); + return NULL; +} + +static void +sna_dri2_destroy_buffers(DrawablePtr drawable, DRI2BufferPtr buffers, int count) +{ + ScreenPtr screen = drawable->pScreen; + sna_dri2_private *private; + int i; + + for (i = 0; i < count; i++) { + private = buffers[i].driverPrivate; + if (private->pixmap) + screen->DestroyPixmap(private->pixmap); + else + kgem_delete(&intel->kgem, private->bo); + } + + if (buffers) { + free(buffers[0].driverPrivate); + free(buffers); + } +} + +#else + +static DRI2Buffer2Ptr +sna_dri2_create_buffer(DrawablePtr drawable, unsigned int attachment, + unsigned int format) +{ + ScreenPtr screen = drawable->pScreen; + ScrnInfoPtr scrn = xf86Screens[screen->myNum]; + struct sna *sna = to_sna(scrn); + DRI2Buffer2Ptr buffer; + struct sna_dri2_private *private; + PixmapPtr pixmap; + struct kgem_bo *bo; + int bpp, usage; + + DBG(("%s(attachment=%d, format=%d)\n", + __FUNCTION__, attachment, format)); + + buffer = calloc(1, sizeof *buffer + sizeof *private); + if (buffer == NULL) + return NULL; + private = (struct sna_dri2_private *)(buffer + 1); + + pixmap = NULL; + usage = CREATE_PIXMAP_USAGE_SCRATCH; + switch (attachment) { + case DRI2BufferFrontLeft: + pixmap = get_drawable_pixmap(drawable); + pixmap->refcnt++; + bo = sna_pixmap_set_dri(sna, pixmap); + bpp = pixmap->drawable.bitsPerPixel; + break; + + case DRI2BufferFakeFrontLeft: + case DRI2BufferFakeFrontRight: + usage = 0; + case DRI2BufferFrontRight: + case DRI2BufferBackLeft: + case DRI2BufferBackRight: + pixmap = screen->CreatePixmap(screen, + drawable->width, + drawable->height, + drawable->depth, + usage); + if (!pixmap) + goto err; + + bo = sna_pixmap_set_dri(sna, pixmap); + bpp = pixmap->drawable.bitsPerPixel; + break; + + default: + bpp = format ? format : drawable->bitsPerPixel, + bo = kgem_create_2d(&sna->kgem, + drawable->width, drawable->height, bpp, + //sna->kgem.gen >= 40 ? I915_TILING_Y : I915_TILING_X, + I915_TILING_Y, + CREATE_EXACT); + break; + } + if (bo == NULL) + goto err; + + buffer->attachment = attachment; + buffer->pitch = bo->pitch; + buffer->cpp = bpp / 8; + buffer->driverPrivate = private; + buffer->format = format; + buffer->flags = 0; + buffer->name = kgem_bo_flink(&sna->kgem, bo); + private->refcnt = 1; + private->pixmap = pixmap; + private->bo = bo; + private->attachment = attachment; + + if (buffer->name == 0) { + /* failed to name buffer */ + if (pixmap) + screen->DestroyPixmap(pixmap); + else + kgem_bo_destroy(&sna->kgem, bo); + goto err; + } + + return buffer; + +err: + free(buffer); + return NULL; +} + +static void sna_dri2_destroy_buffer(DrawablePtr drawable, DRI2Buffer2Ptr buffer) +{ + if (buffer && buffer->driverPrivate) { + struct sna_dri2_private *private = buffer->driverPrivate; + if (--private->refcnt == 0) { + if (private->pixmap) { + ScreenPtr screen = private->pixmap->drawable.pScreen; + screen->DestroyPixmap(private->pixmap); + } else { + struct sna *sna = to_sna_from_drawable(drawable); + kgem_bo_destroy(&sna->kgem, private->bo); + } + + free(buffer); + } + } else + free(buffer); +} + +#endif + +static void sna_dri2_reference_buffer(DRI2Buffer2Ptr buffer) +{ + if (buffer) { + struct sna_dri2_private *private = buffer->driverPrivate; + private->refcnt++; + } +} + +static void damage(DrawablePtr drawable, RegionPtr region) +{ + PixmapPtr pixmap; + struct sna_pixmap *priv; + int16_t dx, dy; + + pixmap = get_drawable_pixmap(drawable); + get_drawable_deltas(drawable, pixmap, &dx, &dy); + + priv = sna_pixmap(pixmap); + if (priv->gpu_only) + return; + + if (region) { + BoxPtr box; + + RegionTranslate(region, + drawable->x + dx, + drawable->y + dy); + box = RegionExtents(region); + if (RegionNumRects(region) == 1 && + box->x1 <= 0 && box->y1 <= 0 && + box->x2 >= pixmap->drawable.width && + box->y2 >= pixmap->drawable.height) { + sna_damage_all(&priv->gpu_damage, + pixmap->drawable.width, + pixmap->drawable.height); + sna_damage_destroy(&priv->cpu_damage); + } else { + sna_damage_add(&priv->gpu_damage, region); + sna_damage_subtract(&priv->cpu_damage, region); + } + + RegionTranslate(region, + -(drawable->x + dx), + -(drawable->y + dy)); + } else { + BoxRec box; + + box.x1 = drawable->x + dx; + box.x2 = box.x1 + drawable->width; + + box.y1 = drawable->y + dy; + box.y2 = box.y1 + drawable->height; + if (box.x1 == 0 && box.y1 == 0 && + box.x2 == pixmap->drawable.width && + box.y2 == pixmap->drawable.height) { + sna_damage_all(&priv->gpu_damage, + pixmap->drawable.width, + pixmap->drawable.height); + sna_damage_destroy(&priv->cpu_damage); + } else { + sna_damage_add_box(&priv->gpu_damage, &box); + sna_damage_subtract_box(&priv->gpu_damage, &box); + } + } +} + +static void +sna_dri2_copy_region(DrawablePtr drawable, RegionPtr region, + DRI2BufferPtr destBuffer, DRI2BufferPtr sourceBuffer) +{ + struct sna *sna = to_sna_from_drawable(drawable); + struct sna_dri2_private *srcPrivate = sourceBuffer->driverPrivate; + struct sna_dri2_private *dstPrivate = destBuffer->driverPrivate; + ScreenPtr screen = drawable->pScreen; + DrawablePtr src = (srcPrivate->attachment == DRI2BufferFrontLeft) + ? drawable : &srcPrivate->pixmap->drawable; + DrawablePtr dst = (dstPrivate->attachment == DRI2BufferFrontLeft) + ? drawable : &dstPrivate->pixmap->drawable; + GCPtr gc; + bool flush = false; + + DBG(("%s(region=(%d, %d), (%d, %d)))\n", __FUNCTION__, + region ? REGION_EXTENTS(NULL, region)->x1 : 0, + region ? REGION_EXTENTS(NULL, region)->y1 : 0, + region ? REGION_EXTENTS(NULL, region)->x2 : dst->width, + region ? REGION_EXTENTS(NULL, region)->y2 : dst->height)); + + gc = GetScratchGC(dst->depth, screen); + if (!gc) + return; + + if (region) { + RegionPtr clip; + + clip = REGION_CREATE(screen, NULL, 0); + pixman_region_intersect_rect(clip, region, + 0, 0, dst->width, dst->height); + (*gc->funcs->ChangeClip)(gc, CT_REGION, clip, 0); + region = clip; + } + ValidateGC(dst, gc); + + /* Invalidate src to reflect unknown modifications made by the client */ + damage(src, region); + + /* Wait for the scanline to be outside the region to be copied */ + if (sna->flags & SNA_SWAP_WAIT) + flush = sna_wait_for_scanline(sna, get_drawable_pixmap(dst), + NULL, region); + + /* It's important that this copy gets submitted before the + * direct rendering client submits rendering for the next + * frame, but we don't actually need to submit right now. The + * client will wait for the DRI2CopyRegion reply or the swap + * buffer event before rendering, and we'll hit the flush + * callback chain before those messages are sent. We submit + * our batch buffers from the flush callback chain so we know + * that will happen before the client tries to render + * again. + */ + gc->ops->CopyArea(src, dst, gc, + 0, 0, + drawable->width, drawable->height, + 0, 0); + FreeScratchGC(gc); + + DBG(("%s: flushing? %d\n", __FUNCTION__, flush)); + if (flush) /* STAT! */ + kgem_submit(&sna->kgem); +} + +#if DRI2INFOREC_VERSION >= 4 + + +static int +sna_dri2_get_pipe(DrawablePtr pDraw) +{ + ScreenPtr pScreen = pDraw->pScreen; + ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; + BoxRec box, crtcbox; + xf86CrtcPtr crtc; + int pipe = -1; + + box.x1 = pDraw->x; + box.y1 = pDraw->y; + box.x2 = box.x1 + pDraw->width; + box.y2 = box.y1 + pDraw->height; + + crtc = sna_covering_crtc(pScrn, &box, NULL, &crtcbox); + + /* Make sure the CRTC is valid and this is the real front buffer */ + if (crtc != NULL && !crtc->rotatedData) + pipe = sna_crtc_to_pipe(crtc); + + DBG(("%s(box=((%d, %d), (%d, %d)), crtcbox=((%d, %d), (%d, %d)), pipe=%d)\n", + __FUNCTION__, + box.x1, box.y1, box.x2, box.y2, + crtcbox.x1, crtcbox.y1, crtcbox.x2, crtcbox.y2, + pipe)); + + return pipe; +} + +static RESTYPE frame_event_client_type, frame_event_drawable_type; + +static int +sna_dri2_frame_event_client_gone(void *data, XID id) +{ + DRI2FrameEventPtr frame_event = data; + + frame_event->client = NULL; + frame_event->client_id = None; + return Success; +} + +static int +sna_dri2_frame_event_drawable_gone(void *data, XID id) +{ + DRI2FrameEventPtr frame_event = data; + + frame_event->drawable_id = None; + return Success; +} + +static Bool +sna_dri2_register_frame_event_resource_types(void) +{ + frame_event_client_type = + CreateNewResourceType(sna_dri2_frame_event_client_gone, + "Frame Event Client"); + if (!frame_event_client_type) + return FALSE; + + frame_event_drawable_type = + CreateNewResourceType(sna_dri2_frame_event_drawable_gone, + "Frame Event Drawable"); + if (!frame_event_drawable_type) + return FALSE; + + return TRUE; +} + +/* + * Hook this frame event into the server resource + * database so we can clean it up if the drawable or + * client exits while the swap is pending + */ +static Bool +sna_dri2_add_frame_event(DRI2FrameEventPtr frame_event) +{ + frame_event->client_id = FakeClientID(frame_event->client->index); + + if (!AddResource(frame_event->client_id, + frame_event_client_type, + frame_event)) + return FALSE; + + if (!AddResource(frame_event->drawable_id, + frame_event_drawable_type, + frame_event)) { + FreeResourceByType(frame_event->client_id, + frame_event_client_type, + TRUE); + return FALSE; + } + + return TRUE; +} + +static void +sna_dri2_del_frame_event(DRI2FrameEventPtr frame_event) +{ + if (frame_event->client_id) + FreeResourceByType(frame_event->client_id, + frame_event_client_type, + TRUE); + + if (frame_event->drawable_id) + FreeResourceByType(frame_event->drawable_id, + frame_event_drawable_type, + TRUE); +} + +static void +sna_dri2_exchange_buffers(DrawablePtr draw, + DRI2BufferPtr front, DRI2BufferPtr back) +{ + struct sna_dri2_private *front_priv, *back_priv; + struct sna_pixmap *front_sna, *back_sna; + struct kgem_bo *bo; + int tmp; + + DBG(("%s()\n", __FUNCTION__)); + + front_priv = front->driverPrivate; + back_priv = back->driverPrivate; + + front_sna = sna_pixmap(front_priv->pixmap); + back_sna = sna_pixmap(back_priv->pixmap); + + /* Force a copy/readback for the next CPU access */ + if (!front_sna->gpu_only) { + sna_damage_all(&front_sna->gpu_damage, + front_priv->pixmap->drawable.width, + front_priv->pixmap->drawable.height); + sna_damage_destroy(&front_sna->cpu_damage); + } + if (front_sna->mapped) { + munmap(front_priv->pixmap->devPrivate.ptr, + front_sna->gpu_bo->size); + front_sna->mapped = false; + } + if (!back_sna->gpu_only) { + sna_damage_all(&back_sna->gpu_damage, + back_priv->pixmap->drawable.width, + back_priv->pixmap->drawable.height); + sna_damage_destroy(&back_sna->cpu_damage); + } + if (back_sna->mapped) { + munmap(back_priv->pixmap->devPrivate.ptr, + back_sna->gpu_bo->size); + back_sna->mapped = false; + } + + /* Swap BO names so DRI works */ + tmp = front->name; + front->name = back->name; + back->name = tmp; + + /* and swap bo so future flips work */ + bo = front_priv->bo; + front_priv->bo = back_priv->bo; + back_priv->bo = bo; + + bo = front_sna->gpu_bo; + front_sna->gpu_bo = back_sna->gpu_bo; + back_sna->gpu_bo = bo; +} + +/* + * Our internal swap routine takes care of actually exchanging, blitting, or + * flipping buffers as necessary. + */ +static Bool +sna_dri2_schedule_flip(struct sna *sna, + ClientPtr client, DrawablePtr draw, DRI2BufferPtr front, + DRI2BufferPtr back, DRI2SwapEventPtr func, void *data, + unsigned int target_msc) +{ + struct sna_dri2_private *back_priv; + DRI2FrameEventPtr flip_info; + + /* Main crtc for this drawable shall finally deliver pageflip event. */ + int ref_crtc_hw_id = sna_dri2_get_pipe(draw); + + DBG(("%s()\n", __FUNCTION__)); + + flip_info = calloc(1, sizeof(DRI2FrameEventRec)); + if (!flip_info) + return FALSE; + + flip_info->drawable_id = draw->id; + flip_info->client = client; + flip_info->type = DRI2_SWAP; + flip_info->event_complete = func; + flip_info->event_data = data; + flip_info->frame = target_msc; + + if (!sna_dri2_add_frame_event(flip_info)) { + free(flip_info); + return FALSE; + } + + /* Page flip the full screen buffer */ + back_priv = back->driverPrivate; + if (sna_do_pageflip(sna, + back_priv->pixmap, + flip_info, ref_crtc_hw_id)) + return TRUE; + + sna_dri2_del_frame_event(flip_info); + free(flip_info); + return FALSE; +} + +static Bool +can_exchange(DRI2BufferPtr front, DRI2BufferPtr back) +{ + struct sna_dri2_private *front_priv = front->driverPrivate; + struct sna_dri2_private *back_priv = back->driverPrivate; + PixmapPtr front_pixmap = front_priv->pixmap; + PixmapPtr back_pixmap = back_priv->pixmap; + struct sna_pixmap *front_sna = sna_pixmap(front_pixmap); + struct sna_pixmap *back_sna = sna_pixmap(back_pixmap); + + if (front_pixmap->drawable.width != back_pixmap->drawable.width) { + DBG(("%s -- no, size mismatch: front width=%d, back=%d\n", + __FUNCTION__, + front_pixmap->drawable.width, + back_pixmap->drawable.width)); + return FALSE; + } + + if (front_pixmap->drawable.height != back_pixmap->drawable.height) { + DBG(("%s -- no, size mismatch: front height=%d, back=%d\n", + __FUNCTION__, + front_pixmap->drawable.height, + back_pixmap->drawable.height)); + return FALSE; + } + + if (front_pixmap->drawable.bitsPerPixel != back_pixmap->drawable.bitsPerPixel) { + DBG(("%s -- no, depth mismatch: front bpp=%d, back=%d\n", + __FUNCTION__, + front_pixmap->drawable.bitsPerPixel, + back_pixmap->drawable.bitsPerPixel)); + return FALSE; + } + + /* prevent an implicit tiling mode change */ + if (front_sna->gpu_bo->tiling != back_sna->gpu_bo->tiling) { + DBG(("%s -- no, tiling mismatch: front %d, back=%d\n", + __FUNCTION__, + front_sna->gpu_bo->tiling, + back_sna->gpu_bo->tiling)); + return FALSE; + } + + if (front_sna->gpu_only != back_sna->gpu_only) { + DBG(("%s -- no, mismatch in gpu_only: front %d, back=%d\n", + __FUNCTION__, front_sna->gpu_only, back_sna->gpu_only)); + return FALSE; + } + + return TRUE; +} + +void sna_dri2_frame_event(unsigned int frame, unsigned int tv_sec, + unsigned int tv_usec, DRI2FrameEventPtr swap_info) +{ + DrawablePtr drawable; + ScreenPtr screen; + ScrnInfoPtr scrn; + struct sna *sna; + int status; + + DBG(("%s(id=%d, type=%d)\n", __FUNCTION__, + (int)swap_info->drawable_id, swap_info->type)); + + status = BadDrawable; + if (swap_info->drawable_id) + status = dixLookupDrawable(&drawable, + swap_info->drawable_id, + serverClient, + M_ANY, DixWriteAccess); + if (status != Success) + goto done; + + screen = drawable->pScreen; + scrn = xf86Screens[screen->myNum]; + sna = to_sna(scrn); + + switch (swap_info->type) { + case DRI2_FLIP: + /* If we can still flip... */ + if (DRI2CanFlip(drawable) && + !sna->shadow && + can_exchange(swap_info->front, swap_info->back) && + sna_dri2_schedule_flip(sna, + swap_info->client, + drawable, + swap_info->front, + swap_info->back, + swap_info->event_complete, + swap_info->event_data, + swap_info->frame)) { + sna_dri2_exchange_buffers(drawable, + swap_info->front, + swap_info->back); + break; + } + /* else fall through to exchange/blit */ + case DRI2_SWAP: { + int swap_type; + + if (DRI2CanExchange(drawable) && + can_exchange(swap_info->front, swap_info->back)) { + sna_dri2_exchange_buffers(drawable, + swap_info->front, + swap_info->back); + swap_type = DRI2_EXCHANGE_COMPLETE; + } else { + sna_dri2_copy_region(drawable, NULL, + swap_info->front, + swap_info->back); + swap_type = DRI2_BLIT_COMPLETE; + } + DRI2SwapComplete(swap_info->client, + drawable, frame, + tv_sec, tv_usec, + swap_type, + swap_info->client ? swap_info->event_complete : NULL, + swap_info->event_data); + break; + } + case DRI2_WAITMSC: + if (swap_info->client) + DRI2WaitMSCComplete(swap_info->client, drawable, + frame, tv_sec, tv_usec); + break; + default: + xf86DrvMsg(scrn->scrnIndex, X_WARNING, + "%s: unknown vblank event received\n", __func__); + /* Unknown type */ + break; + } + +done: + sna_dri2_del_frame_event(swap_info); + sna_dri2_destroy_buffer(drawable, swap_info->front); + sna_dri2_destroy_buffer(drawable, swap_info->back); + free(swap_info); +} + +void sna_dri2_flip_event(unsigned int frame, unsigned int tv_sec, + unsigned int tv_usec, DRI2FrameEventPtr flip) +{ + DrawablePtr drawable; + ScreenPtr screen; + ScrnInfoPtr scrn; + int status; + + DBG(("%s(frame=%d, tv=%d.%06d, type=%d)\n", + __FUNCTION__, frame, tv_sec, tv_usec, flip->type)); + + if (!flip->drawable_id) + status = BadDrawable; + else + status = dixLookupDrawable(&drawable, + flip->drawable_id, + serverClient, + M_ANY, DixWriteAccess); + if (status != Success) { + sna_dri2_del_frame_event(flip); + free(flip); + return; + } + + screen = drawable->pScreen; + scrn = xf86Screens[screen->myNum]; + + /* We assume our flips arrive in order, so we don't check the frame */ + switch (flip->type) { + case DRI2_SWAP: + /* Check for too small vblank count of pageflip completion, taking wraparound + * into account. This usually means some defective kms pageflip completion, + * causing wrong (msc, ust) return values and possible visual corruption. + */ + if ((frame < flip->frame) && (flip->frame - frame < 5)) { + static int limit = 5; + + /* XXX we are currently hitting this path with older + * kernels, so make it quieter. + */ + if (limit) { + xf86DrvMsg(scrn->scrnIndex, X_WARNING, + "%s: Pageflip completion has impossible msc %d < target_msc %d\n", + __func__, frame, flip->frame); + limit--; + } + + /* All-0 values signal timestamping failure. */ + frame = tv_sec = tv_usec = 0; + } + + DBG(("%s: swap complete\n", __FUNCTION__)); + DRI2SwapComplete(flip->client, drawable, frame, tv_sec, tv_usec, + DRI2_FLIP_COMPLETE, flip->client ? flip->event_complete : NULL, + flip->event_data); + break; + case DRI2_ASYNC_SWAP: + DBG(("%s: asunc swap flip completed\n", __FUNCTION__)); + to_sna(scrn)->mode.flip_pending[flip->pipe]--; + break; + default: + xf86DrvMsg(scrn->scrnIndex, X_WARNING, + "%s: unknown vblank event received\n", __func__); + /* Unknown type */ + break; + } + + sna_dri2_del_frame_event(flip); + free(flip); +} + +/* + * ScheduleSwap is responsible for requesting a DRM vblank event for the + * appropriate frame. + * + * In the case of a blit (e.g. for a windowed swap) or buffer exchange, + * the vblank requested can simply be the last queued swap frame + the swap + * interval for the drawable. + * + * In the case of a page flip, we request an event for the last queued swap + * frame + swap interval - 1, since we'll need to queue the flip for the frame + * immediately following the received event. + * + * The client will be blocked if it tries to perform further GL commands + * after queueing a swap, though in the Intel case after queueing a flip, the + * client is free to queue more commands; they'll block in the kernel if + * they access buffers busy with the flip. + * + * When the swap is complete, the driver should call into the server so it + * can send any swap complete events that have been requested. + */ +static int +sna_dri2_schedule_swap(ClientPtr client, DrawablePtr draw, DRI2BufferPtr front, + DRI2BufferPtr back, CARD64 *target_msc, CARD64 divisor, + CARD64 remainder, DRI2SwapEventPtr func, void *data) +{ + ScreenPtr screen = draw->pScreen; + ScrnInfoPtr scrn = xf86Screens[screen->myNum]; + struct sna *sna = to_sna(scrn); + drmVBlank vbl; + int ret, pipe = sna_dri2_get_pipe(draw), flip = 0; + DRI2FrameEventPtr swap_info = NULL; + enum DRI2FrameEventType swap_type = DRI2_SWAP; + CARD64 current_msc; + + DBG(("%s()\n", __FUNCTION__)); + + /* Drawable not displayed... just complete the swap */ + if (pipe == -1) + goto blit_fallback; + + /* Truncate to match kernel interfaces; means occasional overflow + * misses, but that's generally not a big deal */ + *target_msc &= 0xffffffff; + divisor &= 0xffffffff; + remainder &= 0xffffffff; + + swap_info = calloc(1, sizeof(DRI2FrameEventRec)); + if (!swap_info) + goto blit_fallback; + + swap_info->drawable_id = draw->id; + swap_info->client = client; + swap_info->event_complete = func; + swap_info->event_data = data; + swap_info->front = front; + swap_info->back = back; + + if (!sna_dri2_add_frame_event(swap_info)) { + free(swap_info); + swap_info = NULL; + goto blit_fallback; + } + + sna_dri2_reference_buffer(front); + sna_dri2_reference_buffer(back); + + /* Get current count */ + vbl.request.type = DRM_VBLANK_RELATIVE; + if (pipe > 0) + vbl.request.type |= DRM_VBLANK_SECONDARY; + vbl.request.sequence = 0; + ret = drmWaitVBlank(sna->kgem.fd, &vbl); + if (ret) { + xf86DrvMsg(scrn->scrnIndex, X_WARNING, + "first get vblank counter failed: %s\n", + strerror(errno)); + goto blit_fallback; + } + + current_msc = vbl.reply.sequence; + + /* Flips need to be submitted one frame before */ + if (!sna->shadow && DRI2CanFlip(draw) && can_exchange(front, back)) { + DBG(("%s: can flip\n", __FUNCTION__)); + swap_type = DRI2_FLIP; + flip = 1; + } + + swap_info->type = swap_type; + + /* Correct target_msc by 'flip' if swap_type == DRI2_FLIP. + * Do it early, so handling of different timing constraints + * for divisor, remainder and msc vs. target_msc works. + */ + if (*target_msc > 0) + *target_msc -= flip; + + /* + * If divisor is zero, or current_msc is smaller than target_msc + * we just need to make sure target_msc passes before initiating + * the swap. + */ + if (divisor == 0 || current_msc < *target_msc) { + vbl.request.type = DRM_VBLANK_ABSOLUTE | DRM_VBLANK_EVENT; + if (pipe > 0) + vbl.request.type |= DRM_VBLANK_SECONDARY; + + /* If non-pageflipping, but blitting/exchanging, we need to use + * DRM_VBLANK_NEXTONMISS to avoid unreliable timestamping later + * on. + */ + if (flip == 0) + vbl.request.type |= DRM_VBLANK_NEXTONMISS; + if (pipe > 0) + vbl.request.type |= DRM_VBLANK_SECONDARY; + + /* If target_msc already reached or passed, set it to + * current_msc to ensure we return a reasonable value back + * to the caller. This makes swap_interval logic more robust. + */ + if (current_msc >= *target_msc) + *target_msc = current_msc; + + vbl.request.sequence = *target_msc; + vbl.request.signal = (unsigned long)swap_info; + ret = drmWaitVBlank(sna->kgem.fd, &vbl); + if (ret) { + xf86DrvMsg(scrn->scrnIndex, X_WARNING, + "divisor 0 get vblank counter failed: %s\n", + strerror(errno)); + goto blit_fallback; + } + + *target_msc = vbl.reply.sequence + flip; + swap_info->frame = *target_msc; + return TRUE; + } + + /* + * If we get here, target_msc has already passed or we don't have one, + * and we need to queue an event that will satisfy the divisor/remainder + * equation. + */ + vbl.request.type = DRM_VBLANK_ABSOLUTE | DRM_VBLANK_EVENT; + if (flip == 0) + vbl.request.type |= DRM_VBLANK_NEXTONMISS; + if (pipe > 0) + vbl.request.type |= DRM_VBLANK_SECONDARY; + + vbl.request.sequence = current_msc - (current_msc % divisor) + + remainder; + + /* + * If the calculated deadline vbl.request.sequence is smaller than + * or equal to current_msc, it means we've passed the last point + * when effective onset frame seq could satisfy + * seq % divisor == remainder, so we need to wait for the next time + * this will happen. + + * This comparison takes the 1 frame swap delay in pageflipping mode + * into account, as well as a potential DRM_VBLANK_NEXTONMISS delay + * if we are blitting/exchanging instead of flipping. + */ + if (vbl.request.sequence <= current_msc) + vbl.request.sequence += divisor; + + /* Account for 1 frame extra pageflip delay if flip > 0 */ + vbl.request.sequence -= flip; + + vbl.request.signal = (unsigned long)swap_info; + ret = drmWaitVBlank(sna->kgem.fd, &vbl); + if (ret) { + xf86DrvMsg(scrn->scrnIndex, X_WARNING, + "final get vblank counter failed: %s\n", + strerror(errno)); + goto blit_fallback; + } + + /* Adjust returned value for 1 fame pageflip offset of flip > 0 */ + *target_msc = vbl.reply.sequence + flip; + swap_info->frame = *target_msc; + return TRUE; + +blit_fallback: + DBG(("%s -- blit\n", __FUNCTION__)); + sna_dri2_copy_region(draw, NULL, front, back); + + DRI2SwapComplete(client, draw, 0, 0, 0, DRI2_BLIT_COMPLETE, func, data); + if (swap_info) { + sna_dri2_del_frame_event(swap_info); + sna_dri2_destroy_buffer(draw, swap_info->front); + sna_dri2_destroy_buffer(draw, swap_info->back); + free(swap_info); + } + *target_msc = 0; /* offscreen, so zero out target vblank count */ + return TRUE; +} + +#if DRI2INFOREC_VERSION >= 6 +static void +sna_dri2_async_swap(ClientPtr client, DrawablePtr draw, + DRI2BufferPtr front, DRI2BufferPtr back, + DRI2SwapEventPtr func, void *data) +{ + ScreenPtr screen = draw->pScreen; + ScrnInfoPtr scrn = xf86Screens[screen->myNum]; + struct sna *sna = to_sna(scrn); + int pipe = sna_dri2_get_pipe(draw); + int type = DRI2_EXCHANGE_COMPLETE; + + DBG(("%s()\n", __FUNCTION__)); + + /* Drawable not displayed... just complete the swap */ + if (pipe == -1) + goto exchange; + + if (sna->shadow || + !DRI2CanFlip(draw) || + !can_exchange(front, back)) { + sna_dri2_copy_region(draw, NULL, front, back); + DRI2SwapComplete(client, draw, 0, 0, 0, + DRI2_BLIT_COMPLETE, func, data); + return; + } + + if (!sna->mode.flip_pending[pipe]) { + DRI2FrameEventPtr info; + struct sna_dri2_private *backPrivate = back->driverPrivate; + DrawablePtr src = &backPrivate->pixmap->drawable; + PixmapPtr copy; + GCPtr gc; + + copy = screen->CreatePixmap(screen, + src->width, src->height, src->depth, + 0); + if (!copy) + goto exchange; + + if (!sna_pixmap_force_to_gpu(copy)) { + screen->DestroyPixmap(copy); + goto exchange; + } + + /* copy back to new buffer, and schedule flip */ + gc = GetScratchGC(src->depth, screen); + if (!gc) { + screen->DestroyPixmap(copy); + goto exchange; + } + ValidateGC(src, gc); + + gc->ops->CopyArea(src, ©->drawable, gc, + 0, 0, + draw->width, draw->height, + 0, 0); + FreeScratchGC(gc); + + info = calloc(1, sizeof(DRI2FrameEventRec)); + if (!info) { + screen->DestroyPixmap(copy); + goto exchange; + } + + info->drawable_id = draw->id; + info->client = client; + info->type = DRI2_ASYNC_SWAP; + info->pipe = pipe; + + sna->mode.flip_pending[pipe]++; + sna_do_pageflip(sna, copy, info, + sna_dri2_get_pipe(draw)); + screen->DestroyPixmap(copy); + + type = DRI2_FLIP_COMPLETE; + } + +exchange: + sna_dri2_exchange_buffers(draw, front, back); + DRI2SwapComplete(client, draw, 0, 0, 0, type, func, data); +} +#endif + +/* + * Get current frame count and frame count timestamp, based on drawable's + * crtc. + */ +static int +sna_dri2_get_msc(DrawablePtr draw, CARD64 *ust, CARD64 *msc) +{ + ScreenPtr screen = draw->pScreen; + ScrnInfoPtr scrn = xf86Screens[screen->myNum]; + struct sna *sna = to_sna(scrn); + drmVBlank vbl; + int ret, pipe = sna_dri2_get_pipe(draw); + + DBG(("%s()\n", __FUNCTION__)); + + /* Drawable not displayed, make up a value */ + if (pipe == -1) { + *ust = 0; + *msc = 0; + return TRUE; + } + + vbl.request.type = DRM_VBLANK_RELATIVE; + if (pipe > 0) + vbl.request.type |= DRM_VBLANK_SECONDARY; + vbl.request.sequence = 0; + + ret = drmWaitVBlank(sna->kgem.fd, &vbl); + if (ret) { + static int limit = 5; + if (limit) { + xf86DrvMsg(scrn->scrnIndex, X_WARNING, + "%s:%d get vblank counter failed: %s\n", + __FUNCTION__, __LINE__, + strerror(errno)); + limit--; + } + return FALSE; + } + + *ust = ((CARD64)vbl.reply.tval_sec * 1000000) + vbl.reply.tval_usec; + *msc = vbl.reply.sequence; + + return TRUE; +} + +/* + * Request a DRM event when the requested conditions will be satisfied. + * + * We need to handle the event and ask the server to wake up the client when + * we receive it. + */ +static int +sna_dri2_schedule_wait_msc(ClientPtr client, DrawablePtr draw, CARD64 target_msc, + CARD64 divisor, CARD64 remainder) +{ + ScreenPtr screen = draw->pScreen; + ScrnInfoPtr scrn = xf86Screens[screen->myNum]; + struct sna *sna = to_sna(scrn); + DRI2FrameEventPtr wait_info; + drmVBlank vbl; + int ret, pipe = sna_dri2_get_pipe(draw); + CARD64 current_msc; + + DBG(("%s(target_msc=%llu, divisor=%llu, rem=%llu)\n", + __FUNCTION__, + (long long)target_msc, + (long long)divisor, + (long long)remainder)); + + /* Truncate to match kernel interfaces; means occasional overflow + * misses, but that's generally not a big deal */ + target_msc &= 0xffffffff; + divisor &= 0xffffffff; + remainder &= 0xffffffff; + + /* Drawable not visible, return immediately */ + if (pipe == -1) + goto out_complete; + + wait_info = calloc(1, sizeof(DRI2FrameEventRec)); + if (!wait_info) + goto out_complete; + + wait_info->drawable_id = draw->id; + wait_info->client = client; + wait_info->type = DRI2_WAITMSC; + + /* Get current count */ + vbl.request.type = DRM_VBLANK_RELATIVE; + if (pipe > 0) + vbl.request.type |= DRM_VBLANK_SECONDARY; + vbl.request.sequence = 0; + ret = drmWaitVBlank(sna->kgem.fd, &vbl); + if (ret) { + static int limit = 5; + if (limit) { + xf86DrvMsg(scrn->scrnIndex, X_WARNING, + "%s:%d get vblank counter failed: %s\n", + __FUNCTION__, __LINE__, + strerror(errno)); + limit--; + } + goto out_complete; + } + + current_msc = vbl.reply.sequence; + + /* + * If divisor is zero, or current_msc is smaller than target_msc, + * we just need to make sure target_msc passes before waking up the + * client. + */ + if (divisor == 0 || current_msc < target_msc) { + /* If target_msc already reached or passed, set it to + * current_msc to ensure we return a reasonable value back + * to the caller. This keeps the client from continually + * sending us MSC targets from the past by forcibly updating + * their count on this call. + */ + if (current_msc >= target_msc) + target_msc = current_msc; + vbl.request.type = DRM_VBLANK_ABSOLUTE | DRM_VBLANK_EVENT; + if (pipe > 0) + vbl.request.type |= DRM_VBLANK_SECONDARY; + vbl.request.sequence = target_msc; + vbl.request.signal = (unsigned long)wait_info; + ret = drmWaitVBlank(sna->kgem.fd, &vbl); + if (ret) { + static int limit = 5; + if (limit) { + xf86DrvMsg(scrn->scrnIndex, X_WARNING, + "%s:%d get vblank counter failed: %s\n", + __FUNCTION__, __LINE__, + strerror(errno)); + limit--; + } + goto out_complete; + } + + wait_info->frame = vbl.reply.sequence; + DRI2BlockClient(client, draw); + return TRUE; + } + + /* + * If we get here, target_msc has already passed or we don't have one, + * so we queue an event that will satisfy the divisor/remainder equation. + */ + vbl.request.type = DRM_VBLANK_ABSOLUTE | DRM_VBLANK_EVENT; + if (pipe > 0) + vbl.request.type |= DRM_VBLANK_SECONDARY; + + vbl.request.sequence = current_msc - (current_msc % divisor) + + remainder; + + /* + * If calculated remainder is larger than requested remainder, + * it means we've passed the last point where + * seq % divisor == remainder, so we need to wait for the next time + * that will happen. + */ + if ((current_msc % divisor) >= remainder) + vbl.request.sequence += divisor; + + vbl.request.signal = (unsigned long)wait_info; + ret = drmWaitVBlank(sna->kgem.fd, &vbl); + if (ret) { + static int limit = 5; + if (limit) { + xf86DrvMsg(scrn->scrnIndex, X_WARNING, + "%s:%d get vblank counter failed: %s\n", + __FUNCTION__, __LINE__, + strerror(errno)); + limit--; + } + goto out_complete; + } + + wait_info->frame = vbl.reply.sequence; + DRI2BlockClient(client, draw); + + return TRUE; + +out_complete: + DRI2WaitMSCComplete(client, draw, target_msc, 0, 0); + return TRUE; +} +#endif + +static int dri2_server_generation; + +Bool sna_dri2_open(struct sna *sna, ScreenPtr screen) +{ + DRI2InfoRec info; + int dri2_major = 1; + int dri2_minor = 0; +#if DRI2INFOREC_VERSION >= 4 + const char *driverNames[1]; +#endif + + DBG(("%s()\n", __FUNCTION__)); + + if (sna->kgem.wedged) { + xf86DrvMsg(sna->scrn->scrnIndex, X_WARNING, + "cannot enable DRI2 whilst forcing software fallbacks\n"); + return FALSE; + } + + if (xf86LoaderCheckSymbol("DRI2Version")) + DRI2Version(&dri2_major, &dri2_minor); + + if (dri2_minor < 1) { + xf86DrvMsg(sna->scrn->scrnIndex, X_WARNING, + "DRI2 requires DRI2 module version 1.1.0 or later\n"); + return FALSE; + } + + if (serverGeneration != dri2_server_generation) { + dri2_server_generation = serverGeneration; + if (!sna_dri2_register_frame_event_resource_types()) { + xf86DrvMsg(sna->scrn->scrnIndex, X_WARNING, + "Cannot register DRI2 frame event resources\n"); + return FALSE; + } + } + sna->deviceName = drmGetDeviceNameFromFd(sna->kgem.fd); + memset(&info, '\0', sizeof(info)); + info.fd = sna->kgem.fd; + info.driverName = sna->kgem.gen < 40 ? "i915" : "i965"; + info.deviceName = sna->deviceName; + + DBG(("%s: loading dri driver '%s' [gen=%d] for device '%s'\n", + __FUNCTION__, info.driverName, sna->kgem.gen, info.deviceName)); + +#if DRI2INFOREC_VERSION == 1 + info.version = 1; + info.CreateBuffers = sna_dri2_create_buffers; + info.DestroyBuffers = sna_dri2_destroy_buffers; +#elif DRI2INFOREC_VERSION == 2 + /* The ABI between 2 and 3 was broken so we could get rid of + * the multi-buffer alloc functions. Make sure we indicate the + * right version so DRI2 can reject us if it's version 3 or above. */ + info.version = 2; + info.CreateBuffer = sna_dri2_create_buffer; + info.DestroyBuffer = sna_dri2_destroy_buffer; +#else + info.version = 3; + info.CreateBuffer = sna_dri2_create_buffer; + info.DestroyBuffer = sna_dri2_destroy_buffer; +#endif + + info.CopyRegion = sna_dri2_copy_region; +#if DRI2INFOREC_VERSION >= 4 + { + info.version = 4; + info.ScheduleSwap = sna_dri2_schedule_swap; + info.GetMSC = sna_dri2_get_msc; + info.ScheduleWaitMSC = sna_dri2_schedule_wait_msc; + info.numDrivers = 1; + info.driverNames = driverNames; + driverNames[0] = info.driverName; +#if DRI2INFOREC_VERSION >= 6 + info.version = 6; + info.AsyncSwap = sna_dri2_async_swap; +#endif + } +#endif + + return DRI2ScreenInit(screen, &info); +} + +void sna_dri2_close(struct sna *sna, ScreenPtr screen) +{ + DBG(("%s()\n", __FUNCTION__)); + DRI2CloseScreen(screen); + drmFree(sna->deviceName); +} diff --git a/src/sna/sna_driver.c b/src/sna/sna_driver.c new file mode 100644 index 00000000..b0df9aa5 --- /dev/null +++ b/src/sna/sna_driver.c @@ -0,0 +1,925 @@ +/************************************************************************** + +Copyright 2001 VA Linux Systems Inc., Fremont, California. +Copyright © 2002 by David Dawes + +All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the "Software"), +to deal in the Software without restriction, including without limitation +on the rights to use, copy, modify, merge, publish, distribute, sub +license, and/or sell copies of the Software, and to permit persons to whom +the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice (including the next +paragraph) shall be included in all copies or substantial portions of the +Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL +THE COPYRIGHT HOLDERS AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, +DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR +OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE +USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +/* + * Authors: Jeff Hartmann <jhartmann@valinux.com> + * Abraham van der Merwe <abraham@2d3d.co.za> + * David Dawes <dawes@xfree86.org> + * Alan Hourihane <alanh@tungstengraphics.com> + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <assert.h> +#include <string.h> +#include <stdio.h> +#include <unistd.h> +#include <stdlib.h> +#include <stdio.h> +#include <errno.h> + +#include "xf86.h" +#include "xf86_OSproc.h" +#include "xf86Priv.h" +#include "xf86cmap.h" +#include "compiler.h" +#include "mibstore.h" +#include "vgaHW.h" +#include "mipointer.h" +#include "micmap.h" +#include "shadowfb.h" +#include <X11/extensions/randr.h> +#include "fb.h" +#include "miscstruct.h" +#include "dixstruct.h" +#include "xf86xv.h" +#include <X11/extensions/Xv.h> +#include "sna.h" +#include "sna_module.h" +#include "sna_video.h" + +#include "intel_driver.h" + +#include <sys/ioctl.h> +#include "i915_drm.h" + +static OptionInfoRec sna_options[] = { + {OPTION_TILING_FB, "LinearFramebuffer", OPTV_BOOLEAN, {0}, FALSE}, + {OPTION_TILING_2D, "Tiling", OPTV_BOOLEAN, {0}, TRUE}, + {OPTION_PREFER_OVERLAY, "XvPreferOverlay", OPTV_BOOLEAN, {0}, FALSE}, + {OPTION_COLOR_KEY, "ColorKey", OPTV_INTEGER, {0}, FALSE}, + {OPTION_VIDEO_KEY, "VideoKey", OPTV_INTEGER, {0}, FALSE}, + {OPTION_SWAPBUFFERS_WAIT, "SwapbuffersWait", OPTV_BOOLEAN, {0}, TRUE}, + {OPTION_HOTPLUG, "HotPlug", OPTV_BOOLEAN, {0}, TRUE}, + {OPTION_THROTTLE, "Throttle", OPTV_BOOLEAN, {0}, TRUE}, + {OPTION_RELAXED_FENCING, "UseRelaxedFencing", OPTV_BOOLEAN, {0}, TRUE}, + {OPTION_VMAP, "UseVmap", OPTV_BOOLEAN, {0}, TRUE}, + {-1, NULL, OPTV_NONE, {0}, FALSE} +}; + +static Bool sna_enter_vt(int scrnIndex, int flags); + +/* temporary */ +extern void xf86SetCursor(ScreenPtr screen, CursorPtr pCurs, int x, int y); + +const OptionInfoRec *sna_available_options(int chipid, int busid) +{ + return sna_options; +} + +static void +sna_load_palette(ScrnInfoPtr scrn, int numColors, int *indices, + LOCO * colors, VisualPtr pVisual) +{ + xf86CrtcConfigPtr xf86_config = XF86_CRTC_CONFIG_PTR(scrn); + int i, j, index; + int p; + uint16_t lut_r[256], lut_g[256], lut_b[256]; + + for (p = 0; p < xf86_config->num_crtc; p++) { + xf86CrtcPtr crtc = xf86_config->crtc[p]; + + switch (scrn->depth) { + case 15: + for (i = 0; i < numColors; i++) { + index = indices[i]; + for (j = 0; j < 8; j++) { + lut_r[index * 8 + j] = + colors[index].red << 8; + lut_g[index * 8 + j] = + colors[index].green << 8; + lut_b[index * 8 + j] = + colors[index].blue << 8; + } + } + break; + case 16: + for (i = 0; i < numColors; i++) { + index = indices[i]; + + if (index <= 31) { + for (j = 0; j < 8; j++) { + lut_r[index * 8 + j] = + colors[index].red << 8; + lut_b[index * 8 + j] = + colors[index].blue << 8; + } + } + + for (j = 0; j < 4; j++) { + lut_g[index * 4 + j] = + colors[index].green << 8; + } + } + break; + default: + for (i = 0; i < numColors; i++) { + index = indices[i]; + lut_r[index] = colors[index].red << 8; + lut_g[index] = colors[index].green << 8; + lut_b[index] = colors[index].blue << 8; + } + break; + } + + /* Make the change through RandR */ +#ifdef RANDR_12_INTERFACE + RRCrtcGammaSet(crtc->randr_crtc, lut_r, lut_g, lut_b); +#else + crtc->funcs->gamma_set(crtc, lut_r, lut_g, lut_b, 256); +#endif + } +} + +/** + * Adjust the screen pixmap for the current location of the front buffer. + * This is done at EnterVT when buffers are bound as long as the resources + * have already been created, but the first EnterVT happens before + * CreateScreenResources. + */ +static Bool sna_create_screen_resources(ScreenPtr screen) +{ + ScrnInfoPtr scrn = xf86Screens[screen->myNum]; + struct sna *sna = to_sna(scrn); + + free(screen->devPrivate); + screen->devPrivate = NULL; + + sna->front = screen->CreatePixmap(screen, + screen->width, + screen->height, + screen->rootDepth, + SNA_CREATE_FB); + if (!sna->front) + return FALSE; + + if (!sna_pixmap_force_to_gpu(sna->front)) + goto cleanup_front; + + screen->SetScreenPixmap(sna->front); + + if (!sna_accel_create(sna)) + goto cleanup_front; + + if (!sna_enter_vt(screen->myNum, 0)) + goto cleanup_front; + + return TRUE; + +cleanup_front: + screen->DestroyPixmap(sna->front); + sna->front = NULL; + return FALSE; +} + +static void PreInitCleanup(ScrnInfoPtr scrn) +{ + if (!scrn || !scrn->driverPrivate) + return; + + free(scrn->driverPrivate); + scrn->driverPrivate = NULL; +} + +static void sna_check_chipset_option(ScrnInfoPtr scrn) +{ + struct sna *sna = to_sna(scrn); + MessageType from = X_PROBED; + + intel_detect_chipset(scrn, sna->PciInfo, &sna->chipset); + + /* Set the Chipset and ChipRev, allowing config file entries to override. */ + if (sna->pEnt->device->chipset && *sna->pEnt->device->chipset) { + scrn->chipset = sna->pEnt->device->chipset; + from = X_CONFIG; + } else if (sna->pEnt->device->chipID >= 0) { + scrn->chipset = (char *)xf86TokenToString(intel_chipsets, + sna->pEnt->device->chipID); + from = X_CONFIG; + xf86DrvMsg(scrn->scrnIndex, X_CONFIG, + "ChipID override: 0x%04X\n", + sna->pEnt->device->chipID); + DEVICE_ID(sna->PciInfo) = sna->pEnt->device->chipID; + } else { + from = X_PROBED; + scrn->chipset = (char *)xf86TokenToString(intel_chipsets, + DEVICE_ID(sna->PciInfo)); + } + + if (sna->pEnt->device->chipRev >= 0) { + xf86DrvMsg(scrn->scrnIndex, X_CONFIG, "ChipRev override: %d\n", + sna->pEnt->device->chipRev); + } + + xf86DrvMsg(scrn->scrnIndex, from, "Chipset: \"%s\"\n", + (scrn->chipset != NULL) ? scrn->chipset : "Unknown i8xx"); +} + +static Bool sna_get_early_options(ScrnInfoPtr scrn) +{ + struct sna *sna = to_sna(scrn); + + /* Process the options */ + xf86CollectOptions(scrn, NULL); + if (!(sna->Options = malloc(sizeof(sna_options)))) + return FALSE; + + memcpy(sna->Options, sna_options, sizeof(sna_options)); + xf86ProcessOptions(scrn->scrnIndex, scrn->options, sna->Options); + + return TRUE; +} + +static int sna_open_drm_master(ScrnInfoPtr scrn) +{ + struct sna *sna = to_sna(scrn); + struct pci_device *dev = sna->PciInfo; + drmSetVersion sv; + struct drm_i915_getparam gp; + int err, val; + char busid[20]; + int fd; + + snprintf(busid, sizeof(busid), "pci:%04x:%02x:%02x.%d", + dev->domain, dev->bus, dev->dev, dev->func); + + fd = drmOpen("i915", busid); + if (fd == -1) { + xf86DrvMsg(scrn->scrnIndex, X_ERROR, + "[drm] Failed to open DRM device for %s: %s\n", + busid, strerror(errno)); + return -1; + } + + /* Check that what we opened was a master or a master-capable FD, + * by setting the version of the interface we'll use to talk to it. + * (see DRIOpenDRMMaster() in DRI1) + */ + sv.drm_di_major = 1; + sv.drm_di_minor = 1; + sv.drm_dd_major = -1; + sv.drm_dd_minor = -1; + err = drmSetInterfaceVersion(fd, &sv); + if (err != 0) { + xf86DrvMsg(scrn->scrnIndex, X_ERROR, + "[drm] failed to set drm interface version.\n"); + drmClose(fd); + return -1; + } + + val = FALSE; + gp.param = I915_PARAM_HAS_BLT; + gp.value = &val; + if (drmCommandWriteRead(fd, DRM_I915_GETPARAM, + &gp, sizeof(gp))) { + xf86DrvMsg(scrn->scrnIndex, X_ERROR, + "Failed to detect BLT. Kernel 2.6.37 required.\n"); + drmClose(fd); + return -1; + } + + return fd; +} + +static void sna_close_drm_master(struct sna *sna) +{ + if (sna && sna->kgem.fd > 0) { + drmClose(sna->kgem.fd); + sna->kgem.fd = -1; + } +} + +static void sna_selftest(void) +{ + sna_damage_selftest(); +} + + +/** + * This is called before ScreenInit to do any require probing of screen + * configuration. + * + * This code generally covers probing, module loading, option handling + * card mapping, and RandR setup. + * + * Since xf86InitialConfiguration ends up requiring that we set video modes + * in order to detect configuration, we end up having to do a lot of driver + * setup (talking to the DRM, mapping the device, etc.) in this function. + * As a result, we want to set up that server initialization once rather + * that doing it per generation. + */ +static Bool sna_pre_init(ScrnInfoPtr scrn, int flags) +{ + struct sna *sna; + rgb defaultWeight = { 0, 0, 0 }; + EntityInfoPtr pEnt; + int flags24; + Gamma zeros = { 0.0, 0.0, 0.0 }; + int fd; + + sna_selftest(); + + if (scrn->numEntities != 1) + return FALSE; + + pEnt = xf86GetEntityInfo(scrn->entityList[0]); + + if (flags & PROBE_DETECT) + return TRUE; + + sna = to_sna(scrn); + if (sna == NULL) { + sna = xnfcalloc(sizeof(struct sna), 1); + if (sna == NULL) + return FALSE; + + scrn->driverPrivate = sna; + } + sna->scrn = scrn; + sna->pEnt = pEnt; + + scrn->displayWidth = 640; /* default it */ + + if (sna->pEnt->location.type != BUS_PCI) + return FALSE; + + sna->PciInfo = xf86GetPciInfoForEntity(sna->pEnt->index); + + fd = sna_open_drm_master(scrn); + if (fd == -1) { + xf86DrvMsg(scrn->scrnIndex, X_ERROR, + "Failed to become DRM master.\n"); + return FALSE; + } + + scrn->monitor = scrn->confScreen->monitor; + scrn->progClock = TRUE; + scrn->rgbBits = 8; + + flags24 = Support32bppFb | PreferConvert24to32 | SupportConvert24to32; + + if (!xf86SetDepthBpp(scrn, 0, 0, 0, flags24)) + return FALSE; + + switch (scrn->depth) { + case 8: + case 15: + case 16: + case 24: + break; + default: + xf86DrvMsg(scrn->scrnIndex, X_ERROR, + "Given depth (%d) is not supported by Intel driver\n", + scrn->depth); + return FALSE; + } + xf86PrintDepthBpp(scrn); + + if (!xf86SetWeight(scrn, defaultWeight, defaultWeight)) + return FALSE; + if (!xf86SetDefaultVisual(scrn, -1)) + return FALSE; + + sna->mode.cpp = scrn->bitsPerPixel / 8; + + if (!sna_get_early_options(scrn)) + return FALSE; + + sna_check_chipset_option(scrn); + kgem_init(&sna->kgem, fd, sna->chipset.info->gen); + if (!xf86ReturnOptValBool(sna->Options, + OPTION_RELAXED_FENCING, + sna->kgem.has_relaxed_fencing)) { + xf86DrvMsg(scrn->scrnIndex, + sna->kgem.has_relaxed_fencing ? X_CONFIG : X_PROBED, + "Disabling use of relaxed fencing\n"); + sna->kgem.has_relaxed_fencing = 0; + } + if (!xf86ReturnOptValBool(sna->Options, + OPTION_VMAP, + sna->kgem.has_vmap)) { + xf86DrvMsg(scrn->scrnIndex, + sna->kgem.has_vmap ? X_CONFIG : X_PROBED, + "Disabling use of vmap\n"); + sna->kgem.has_vmap = 0; + } + + /* Enable tiling by default */ + sna->tiling = SNA_TILING_ALL; + + /* Allow user override if they set a value */ + if (!xf86ReturnOptValBool(sna->Options, OPTION_TILING_2D, TRUE)) + sna->tiling &= ~SNA_TILING_2D; + if (xf86ReturnOptValBool(sna->Options, OPTION_TILING_FB, FALSE)) + sna->tiling &= ~SNA_TILING_FB; + + sna->flags = 0; + if (!xf86ReturnOptValBool(sna->Options, OPTION_THROTTLE, TRUE)) + sna->flags |= SNA_NO_THROTTLE; + if (xf86ReturnOptValBool(sna->Options, OPTION_SWAPBUFFERS_WAIT, TRUE)) + sna->flags |= SNA_SWAP_WAIT; + + xf86DrvMsg(scrn->scrnIndex, X_CONFIG, "Framebuffer %s\n", + sna->tiling & SNA_TILING_FB ? "tiled" : "linear"); + xf86DrvMsg(scrn->scrnIndex, X_CONFIG, "Pixmaps %s\n", + sna->tiling & SNA_TILING_2D ? "tiled" : "linear"); + xf86DrvMsg(scrn->scrnIndex, X_CONFIG, "3D buffers %s\n", + sna->tiling & SNA_TILING_3D ? "tiled" : "linear"); + xf86DrvMsg(scrn->scrnIndex, X_CONFIG, "SwapBuffers wait %sabled\n", + sna->flags & SNA_SWAP_WAIT ? "en" : "dis"); + xf86DrvMsg(scrn->scrnIndex, X_CONFIG, "Throttling %sabled\n", + sna->flags & SNA_NO_THROTTLE ? "dis" : "en"); + + if (!sna_mode_pre_init(scrn, sna)) { + PreInitCleanup(scrn); + return FALSE; + } + + if (!xf86SetGamma(scrn, zeros)) { + PreInitCleanup(scrn); + return FALSE; + } + + if (scrn->modes == NULL) { + xf86DrvMsg(scrn->scrnIndex, X_ERROR, "No modes.\n"); + PreInitCleanup(scrn); + return FALSE; + } + scrn->currentMode = scrn->modes; + + /* Set display resolution */ + xf86SetDpi(scrn, 0, 0); + + /* Load the required sub modules */ + if (!xf86LoadSubModule(scrn, "fb")) { + PreInitCleanup(scrn); + return FALSE; + } + + /* Load the dri2 module if requested. */ + xf86LoadSubModule(scrn, "dri2"); + + return sna_accel_pre_init(sna); +} + +/** + * Intialiazes the hardware for the 3D pipeline use in the 2D driver. + * + * Some state caching is performed to avoid redundant state emits. This + * function is also responsible for marking the state as clobbered for DRI + * clients. + */ +static void +sna_block_handler(int i, pointer data, pointer timeout, pointer read_mask) +{ + ScreenPtr screen = screenInfo.screens[i]; + ScrnInfoPtr scrn = xf86Screens[i]; + struct sna *sna = to_sna(scrn); + + screen->BlockHandler = sna->BlockHandler; + + (*screen->BlockHandler) (i, data, timeout, read_mask); + + sna->BlockHandler = screen->BlockHandler; + screen->BlockHandler = sna_block_handler; + + sna_accel_block_handler(sna); +} + +static void +sna_wakeup_handler(int i, pointer data, unsigned long result, pointer read_mask) +{ + ScreenPtr screen = screenInfo.screens[i]; + ScrnInfoPtr scrn = xf86Screens[i]; + struct sna *sna = to_sna(scrn); + + screen->WakeupHandler = sna->WakeupHandler; + + (*screen->WakeupHandler) (i, data, result, read_mask); + + sna->WakeupHandler = screen->WakeupHandler; + screen->WakeupHandler = sna_wakeup_handler; + + sna_accel_wakeup_handler(sna); +} + +#if HAVE_UDEV +static void +sna_handle_uevents(int fd, void *closure) +{ + ScrnInfoPtr scrn = closure; + struct sna *sna = to_sna(scrn); + struct udev_device *dev; + const char *hotplug; + struct stat s; + dev_t udev_devnum; + + dev = udev_monitor_receive_device(sna->uevent_monitor); + if (!dev) + return; + + udev_devnum = udev_device_get_devnum(dev); + fstat(sna->kgem.fd, &s); + /* + * Check to make sure this event is directed at our + * device (by comparing dev_t values), then make + * sure it's a hotplug event (HOTPLUG=1) + */ + + hotplug = udev_device_get_property_value(dev, "HOTPLUG"); + + if (memcmp(&s.st_rdev, &udev_devnum, sizeof (dev_t)) == 0 && + hotplug && atoi(hotplug) == 1) + RRGetInfo(screenInfo.screens[scrn->scrnIndex], TRUE); + + udev_device_unref(dev); +} + +static void +sna_uevent_init(ScrnInfoPtr scrn) +{ + struct sna *sna = to_sna(scrn); + struct udev *u; + struct udev_monitor *mon; + Bool hotplug; + MessageType from = X_CONFIG; + + if (!xf86GetOptValBool(sna->Options, OPTION_HOTPLUG, &hotplug)) { + from = X_DEFAULT; + hotplug = TRUE; + } + + xf86DrvMsg(scrn->scrnIndex, from, "hotplug detection: \"%s\"\n", + hotplug ? "enabled" : "disabled"); + if (!hotplug) + return; + + u = udev_new(); + if (!u) + return; + + mon = udev_monitor_new_from_netlink(u, "udev"); + + if (!mon) { + udev_unref(u); + return; + } + + if (udev_monitor_filter_add_match_subsystem_devtype(mon, + "drm", + "drm_minor") < 0 || + udev_monitor_enable_receiving(mon) < 0) + { + udev_monitor_unref(mon); + udev_unref(u); + return; + } + + sna->uevent_handler = + xf86AddGeneralHandler(udev_monitor_get_fd(mon), + sna_handle_uevents, + scrn); + if (!sna->uevent_handler) { + udev_monitor_unref(mon); + udev_unref(u); + return; + } + + sna->uevent_monitor = mon; +} + +static void +sna_uevent_fini(ScrnInfoPtr scrn) +{ + struct sna *sna = to_sna(scrn); + + if (sna->uevent_handler) { + struct udev *u = udev_monitor_get_udev(sna->uevent_monitor); + + xf86RemoveGeneralHandler(sna->uevent_handler); + + udev_monitor_unref(sna->uevent_monitor); + udev_unref(u); + sna->uevent_handler = NULL; + sna->uevent_monitor = NULL; + } +} +#endif /* HAVE_UDEV */ + +static void sna_leave_vt(int scrnIndex, int flags) +{ + ScrnInfoPtr scrn = xf86Screens[scrnIndex]; + struct sna *sna = to_sna(scrn); + int ret; + + xf86RotateFreeShadow(scrn); + + xf86_hide_cursors(scrn); + + ret = drmDropMaster(sna->kgem.fd); + if (ret) + xf86DrvMsg(scrn->scrnIndex, X_WARNING, + "drmDropMaster failed: %s\n", strerror(errno)); +} + + +static Bool sna_close_screen(int scrnIndex, ScreenPtr screen) +{ + ScrnInfoPtr scrn = xf86Screens[scrnIndex]; + struct sna *sna = to_sna(scrn); + +#if HAVE_UDEV + sna_uevent_fini(scrn); +#endif + + if (scrn->vtSema == TRUE) + sna_leave_vt(scrnIndex, 0); + + sna_accel_close(sna); + + xf86_cursors_fini(screen); + + screen->CloseScreen = sna->CloseScreen; + (*screen->CloseScreen) (scrnIndex, screen); + + if (sna->directRenderingOpen) { + sna_dri2_close(sna, screen); + sna->directRenderingOpen = FALSE; + } + + xf86GARTCloseScreen(scrnIndex); + + scrn->vtSema = FALSE; + return TRUE; +} + +static Bool +sna_screen_init(int scrnIndex, ScreenPtr screen, int argc, char **argv) +{ + ScrnInfoPtr scrn = xf86Screens[screen->myNum]; + struct sna *sna = to_sna(scrn); + VisualPtr visual; + struct pci_device *const device = sna->PciInfo; + + scrn->videoRam = device->regions[2].size / 1024; + +#ifdef DRI2 + sna->directRenderingOpen = sna_dri2_open(sna, screen); + if (sna->directRenderingOpen) + xf86DrvMsg(scrn->scrnIndex, X_INFO, + "direct rendering: DRI2 Enabled\n"); +#endif + + miClearVisualTypes(); + if (!miSetVisualTypes(scrn->depth, + miGetDefaultVisualMask(scrn->depth), + scrn->rgbBits, scrn->defaultVisual)) + return FALSE; + if (!miSetPixmapDepths()) + return FALSE; + + if (!fbScreenInit(screen, NULL, + scrn->virtualX, scrn->virtualY, + scrn->xDpi, scrn->yDpi, + scrn->displayWidth, scrn->bitsPerPixel)) + return FALSE; + + if (scrn->bitsPerPixel > 8) { + /* Fixup RGB ordering */ + visual = screen->visuals + screen->numVisuals; + while (--visual >= screen->visuals) { + if ((visual->class | DynamicClass) == DirectColor) { + visual->offsetRed = scrn->offset.red; + visual->offsetGreen = scrn->offset.green; + visual->offsetBlue = scrn->offset.blue; + visual->redMask = scrn->mask.red; + visual->greenMask = scrn->mask.green; + visual->blueMask = scrn->mask.blue; + } + } + } + + fbPictureInit(screen, NULL, 0); + + xf86SetBlackWhitePixels(screen); + + if (!sna_accel_init(screen, sna)) { + xf86DrvMsg(scrn->scrnIndex, X_ERROR, + "Hardware acceleration initialization failed\n"); + return FALSE; + } + + miInitializeBackingStore(screen); + xf86SetBackingStore(screen); + xf86SetSilkenMouse(screen); + miDCInitialize(screen, xf86GetPointerScreenFuncs()); + + xf86DrvMsg(scrn->scrnIndex, X_INFO, "Initializing HW Cursor\n"); + if (!xf86_cursors_init(screen, SNA_CURSOR_X, SNA_CURSOR_Y, + HARDWARE_CURSOR_TRUECOLOR_AT_8BPP | + HARDWARE_CURSOR_BIT_ORDER_MSBFIRST | + HARDWARE_CURSOR_INVERT_MASK | + HARDWARE_CURSOR_SWAP_SOURCE_AND_MASK | + HARDWARE_CURSOR_AND_SOURCE_WITH_MASK | + HARDWARE_CURSOR_SOURCE_MASK_INTERLEAVE_64 | + HARDWARE_CURSOR_UPDATE_UNHIDDEN | + HARDWARE_CURSOR_ARGB)) { + xf86DrvMsg(scrn->scrnIndex, X_ERROR, + "Hardware cursor initialization failed\n"); + } + + /* Must force it before EnterVT, so we are in control of VT and + * later memory should be bound when allocating, e.g rotate_mem */ + scrn->vtSema = TRUE; + + sna->BlockHandler = screen->BlockHandler; + screen->BlockHandler = sna_block_handler; + + sna->WakeupHandler = screen->WakeupHandler; + screen->WakeupHandler = sna_wakeup_handler; + + screen->SaveScreen = xf86SaveScreen; + sna->CloseScreen = screen->CloseScreen; + screen->CloseScreen = sna_close_screen; + screen->CreateScreenResources = sna_create_screen_resources; + + if (!xf86CrtcScreenInit(screen)) + return FALSE; + + if (!miCreateDefColormap(screen)) + return FALSE; + + if (!xf86HandleColormaps(screen, 256, 8, sna_load_palette, NULL, + CMAP_RELOAD_ON_MODE_SWITCH | + CMAP_PALETTED_TRUECOLOR)) { + return FALSE; + } + + xf86DPMSInit(screen, xf86DPMSSet, 0); + + sna_video_init(sna, screen); + + if (serverGeneration == 1) + xf86ShowUnusedOptions(scrn->scrnIndex, scrn->options); + + sna_mode_init(sna); + + sna->suspended = FALSE; + +#if HAVE_UDEV + sna_uevent_init(scrn); +#endif + + return TRUE; +} + +static void sna_adjust_frame(int scrnIndex, int x, int y, int flags) +{ +} + +static void sna_free_screen(int scrnIndex, int flags) +{ + ScrnInfoPtr scrn = xf86Screens[scrnIndex]; + struct sna *sna = to_sna(scrn); + + if (sna) { + sna_mode_fini(sna); + sna_close_drm_master(sna); + + free(sna); + scrn->driverPrivate = NULL; + } + + if (xf86LoaderCheckSymbol("vgaHWFreeHWRec")) + vgaHWFreeHWRec(xf86Screens[scrnIndex]); +} + +/* + * This gets called when gaining control of the VT, and from ScreenInit(). + */ +static Bool sna_enter_vt(int scrnIndex, int flags) +{ + ScrnInfoPtr scrn = xf86Screens[scrnIndex]; + struct sna *sna = to_sna(scrn); + + if (drmSetMaster(sna->kgem.fd)) { + xf86DrvMsg(scrn->scrnIndex, X_WARNING, + "drmSetMaster failed: %s\n", + strerror(errno)); + } + + return xf86SetDesiredModes(scrn); +} + +static Bool sna_switch_mode(int scrnIndex, DisplayModePtr mode, int flags) +{ + return xf86SetSingleMode(xf86Screens[scrnIndex], mode, RR_Rotate_0); +} + +static ModeStatus +sna_valid_mode(int scrnIndex, DisplayModePtr mode, Bool verbose, int flags) +{ + return MODE_OK; +} + +#ifndef SUSPEND_SLEEP +#define SUSPEND_SLEEP 0 +#endif +#ifndef RESUME_SLEEP +#define RESUME_SLEEP 0 +#endif + +/* + * This function is only required if we need to do anything differently from + * DoApmEvent() in common/xf86PM.c, including if we want to see events other + * than suspend/resume. + */ +static Bool sna_pm_event(int scrnIndex, pmEvent event, Bool undo) +{ + ScrnInfoPtr scrn = xf86Screens[scrnIndex]; + struct sna *sna = to_sna(scrn); + + switch (event) { + case XF86_APM_SYS_SUSPEND: + case XF86_APM_CRITICAL_SUSPEND: /*do we want to delay a critical suspend? */ + case XF86_APM_USER_SUSPEND: + case XF86_APM_SYS_STANDBY: + case XF86_APM_USER_STANDBY: + if (!undo && !sna->suspended) { + scrn->LeaveVT(scrnIndex, 0); + sna->suspended = TRUE; + sleep(SUSPEND_SLEEP); + } else if (undo && sna->suspended) { + sleep(RESUME_SLEEP); + scrn->EnterVT(scrnIndex, 0); + sna->suspended = FALSE; + } + break; + case XF86_APM_STANDBY_RESUME: + case XF86_APM_NORMAL_RESUME: + case XF86_APM_CRITICAL_RESUME: + if (sna->suspended) { + sleep(RESUME_SLEEP); + scrn->EnterVT(scrnIndex, 0); + sna->suspended = FALSE; + /* + * Turn the screen saver off when resuming. This seems to be + * needed to stop xscreensaver kicking in (when used). + * + * XXX DoApmEvent() should probably call this just like + * xf86VTSwitch() does. Maybe do it here only in 4.2 + * compatibility mode. + */ + SaveScreens(SCREEN_SAVER_FORCER, ScreenSaverReset); + } + break; + /* This is currently used for ACPI */ + case XF86_APM_CAPABILITY_CHANGED: + SaveScreens(SCREEN_SAVER_FORCER, ScreenSaverReset); + break; + + default: + ErrorF("sna_pm_event: received APM event %d\n", event); + } + return TRUE; +} + +void sna_init_scrn(ScrnInfoPtr scrn) +{ + scrn->PreInit = sna_pre_init; + scrn->ScreenInit = sna_screen_init; + scrn->SwitchMode = sna_switch_mode; + scrn->AdjustFrame = sna_adjust_frame; + scrn->EnterVT = sna_enter_vt; + scrn->LeaveVT = sna_leave_vt; + scrn->FreeScreen = sna_free_screen; + scrn->ValidMode = sna_valid_mode; + scrn->PMEvent = sna_pm_event; +} diff --git a/src/sna/sna_glyphs.c b/src/sna/sna_glyphs.c new file mode 100644 index 00000000..bb4b9cde --- /dev/null +++ b/src/sna/sna_glyphs.c @@ -0,0 +1,1145 @@ +/* + * Copyright © 2010 Intel Corporation + * Partly based on code Copyright © 2008 Red Hat, Inc. + * Partly based on code Copyright © 2000 SuSE, Inc. + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that + * copyright notice and this permission notice appear in supporting + * documentation, and that the name of Intel not be used in advertising or + * publicity pertaining to distribution of the software without specific, + * written prior permission. Intel makes no representations about the + * suitability of this software for any purpose. It is provided "as is" + * without express or implied warranty. + * + * INTEL DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL INTEL + * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION + * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN + * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that + * copyright notice and this permission notice appear in supporting + * documentation, and that the name of Red Hat not be used in advertising or + * publicity pertaining to distribution of the software without specific, + * written prior permission. Red Hat makes no representations about the + * suitability of this software for any purpose. It is provided "as is" + * without express or implied warranty. + * + * Red Hat DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL Red Hat + * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION + * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN + * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Permission to use, copy, modify, distribute, and sell this software and its + * documentation for any purpose is hereby granted without fee, provided that + * the above copyright notice appear in all copies and that both that + * copyright notice and this permission notice appear in supporting + * documentation, and that the name of SuSE not be used in advertising or + * publicity pertaining to distribution of the software without specific, + * written prior permission. SuSE makes no representations about the + * suitability of this software for any purpose. It is provided "as is" + * without express or implied warranty. + * + * SuSE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, INCLUDING ALL + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS, IN NO EVENT SHALL SuSE + * BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION + * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN + * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + * + * Author: Chris Wilson <chris@chris-wilson.co.uk> + * Based on code by: Keith Packard <keithp@keithp.com> and Owen Taylor <otaylor@fishsoup.net> + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "sna.h" +#include "sna_render.h" +#include "sna_render_inline.h" + +#include <mipict.h> +#include <fbpict.h> +#include <fb.h> + +#if DEBUG_GLYPHS +#undef DBG +#define DBG(x) ErrorF x +#else +#define NDEBUG 1 +#endif + +#define CACHE_PICTURE_SIZE 1024 +#define GLYPH_MIN_SIZE 8 +#define GLYPH_MAX_SIZE 64 +#define GLYPH_CACHE_SIZE (CACHE_PICTURE_SIZE * CACHE_PICTURE_SIZE / (GLYPH_MIN_SIZE * GLYPH_MIN_SIZE)) + +#if DEBUG_GLYPHS +static void _assert_pixmap_contains_box(PixmapPtr pixmap, BoxPtr box, const char *function) +{ + if (box->x1 < 0 || box->y1 < 0 || + box->x2 > pixmap->drawable.width || + box->y2 > pixmap->drawable.height) + { + ErrorF("%s: damage box is beyond the pixmap: box=(%d, %d), (%d, %d), pixmap=(%d, %d)\n", + __FUNCTION__, + box->x1, box->y1, box->x2, box->y2, + pixmap->drawable.width, + pixmap->drawable.height); + assert(0); + } +} +#define assert_pixmap_contains_box(p, b) _assert_pixmap_contains_box(p, b, __FUNCTION__) +#else +#define assert_pixmap_contains_box(p, b) +#endif + +struct sna_glyph { + PicturePtr atlas; + struct sna_coordinate coordinate; + uint16_t size, pos; +}; + +static DevPrivateKeyRec sna_glyph_key; + +static inline struct sna_glyph *glyph_get_private(GlyphPtr glyph) +{ + return dixGetPrivateAddr(&glyph->devPrivates, &sna_glyph_key); +} + +#define NeedsComponent(f) (PICT_FORMAT_A(f) != 0 && PICT_FORMAT_RGB(f) != 0) + +static void unrealize_glyph_caches(struct sna *sna) +{ + struct sna_render *render = &sna->render; + int i; + + DBG(("%s\n", __FUNCTION__)); + + for (i = 0; i < ARRAY_SIZE(render->glyph); i++) { + struct sna_glyph_cache *cache = &render->glyph[i]; + + if (cache->picture) + FreePicture(cache->picture, 0); + + free(cache->glyphs); + } + memset(render->glyph, 0, sizeof(render->glyph)); +} + +/* All caches for a single format share a single pixmap for glyph storage, + * allowing mixing glyphs of different sizes without paying a penalty + * for switching between source pixmaps. (Note that for a size of font + * right at the border between two sizes, we might be switching for almost + * every glyph.) + * + * This function allocates the storage pixmap, and then fills in the + * rest of the allocated structures for all caches with the given format. + */ +static Bool realize_glyph_caches(struct sna *sna) +{ + ScreenPtr screen = sna->scrn->pScreen; + unsigned int formats[] = { + PIXMAN_a8, + PIXMAN_a8r8g8b8, + }; + int i; + + DBG(("%s\n", __FUNCTION__)); + + for (i = 0; i < ARRAY_SIZE(formats); i++) { + struct sna_glyph_cache *cache = &sna->render.glyph[i]; + PixmapPtr pixmap; + PicturePtr picture; + CARD32 component_alpha; + int depth = PIXMAN_FORMAT_DEPTH(formats[i]); + int error; + PictFormatPtr pPictFormat = PictureMatchFormat(screen, depth, formats[i]); + if (!pPictFormat) + goto bail; + + /* Now allocate the pixmap and picture */ + pixmap = screen->CreatePixmap(screen, + CACHE_PICTURE_SIZE, + CACHE_PICTURE_SIZE, + depth, + CREATE_PIXMAP_USAGE_SCRATCH); + if (!pixmap) + goto bail; + + component_alpha = NeedsComponent(pPictFormat->format); + picture = CreatePicture(0, &pixmap->drawable, pPictFormat, + CPComponentAlpha, &component_alpha, + serverClient, &error); + + screen->DestroyPixmap(pixmap); + + if (!picture) + goto bail; + + ValidatePicture(picture); + + cache->count = cache->evict = 0; + cache->picture = picture; + cache->glyphs = calloc(sizeof(struct sna_glyph *), + GLYPH_CACHE_SIZE); + if (!cache->glyphs) + goto bail; + + cache->evict = rand() % GLYPH_CACHE_SIZE; + } + + return TRUE; + +bail: + unrealize_glyph_caches(sna); + return FALSE; +} + +static void +glyph_cache_upload(ScreenPtr screen, + struct sna_glyph_cache *cache, + GlyphPtr glyph, + int16_t x, int16_t y) +{ + DBG(("%s: upload glyph %p to cache (%d, %d)x(%d, %d)\n", + __FUNCTION__, glyph, x, y, glyph->info.width, glyph->info.height)); + sna_composite(PictOpSrc, + GlyphPicture(glyph)[screen->myNum], 0, cache->picture, + 0, 0, + 0, 0, + x, y, + glyph->info.width, + glyph->info.height); +} + +static void +glyph_extents(int nlist, + GlyphListPtr list, + GlyphPtr *glyphs, + BoxPtr extents) +{ + int16_t x1, x2, y1, y2; + int16_t x, y; + + x1 = y1 = MAXSHORT; + x2 = y2 = MINSHORT; + x = y = 0; + while (nlist--) { + int n = list->len; + x += list->xOff; + y += list->yOff; + list++; + while (n--) { + GlyphPtr glyph = *glyphs++; + + if (glyph->info.width && glyph->info.height) { + int v; + + v = x - glyph->info.x; + if (v < x1) + x1 = v; + v += glyph->info.width; + if (v > x2) + x2 = v; + + v = y - glyph->info.y; + if (v < y1) + y1 = v; + v += glyph->info.height; + if (v > y2) + y2 = v; + } + + x += glyph->info.xOff; + y += glyph->info.yOff; + } + } + + extents->x1 = x1; + extents->x2 = x2; + extents->y1 = y1; + extents->y2 = y2; +} + +static inline unsigned int +glyph_size_to_count(int size) +{ + size /= GLYPH_MIN_SIZE; + return size * size; +} + +static inline unsigned int +glyph_count_to_mask(int count) +{ + return ~(count - 1); +} + +static inline unsigned int +glyph_size_to_mask(int size) +{ + return glyph_count_to_mask(glyph_size_to_count(size)); +} + +static int +glyph_cache(ScreenPtr screen, + struct sna_render *render, + GlyphPtr glyph) +{ + PicturePtr glyph_picture = GlyphPicture(glyph)[screen->myNum]; + struct sna_glyph_cache *cache = &render->glyph[PICT_FORMAT_RGB(glyph_picture->format) != 0]; + struct sna_glyph *priv; + int size, mask, pos, s; + + if (glyph->info.width > GLYPH_MAX_SIZE || + glyph->info.height > GLYPH_MAX_SIZE) + return FALSE; + + for (size = GLYPH_MIN_SIZE; size <= GLYPH_MAX_SIZE; size *= 2) + if (glyph->info.width <= size && glyph->info.height <= size) + break; + + s = glyph_size_to_count(size); + mask = glyph_count_to_mask(s); + pos = (cache->count + s - 1) & mask; + if (pos < GLYPH_CACHE_SIZE) { + cache->count = pos + s; + } else { + priv = NULL; + for (s = size; s <= GLYPH_MAX_SIZE; s *= 2) { + int i = cache->evict & glyph_size_to_mask(s); + priv = cache->glyphs[i]; + if (priv == NULL) + continue; + + if (priv->size >= s) { + cache->glyphs[i] = NULL; + priv->atlas = NULL; + pos = i; + } else + priv = NULL; + break; + } + if (priv == NULL) { + int count = glyph_size_to_count(size); + pos = cache->evict & glyph_count_to_mask(count); + for (s = 0; s < count; s++) { + priv = cache->glyphs[pos + s]; + if (priv != NULL) { + priv->atlas =NULL; + cache->glyphs[pos + s] = NULL; + } + } + } + + /* And pick a new eviction position */ + cache->evict = rand() % GLYPH_CACHE_SIZE; + } + assert(cache->glyphs[pos] == NULL); + + priv = glyph_get_private(glyph); + cache->glyphs[pos] = priv; + priv->atlas = cache->picture; + priv->size = size; + priv->pos = pos << 1 | (PICT_FORMAT_RGB(glyph_picture->format) != 0); + s = pos / ((GLYPH_MAX_SIZE / GLYPH_MIN_SIZE) * (GLYPH_MAX_SIZE / GLYPH_MIN_SIZE)); + priv->coordinate.x = s % (CACHE_PICTURE_SIZE / GLYPH_MAX_SIZE) * GLYPH_MAX_SIZE; + priv->coordinate.y = (s / (CACHE_PICTURE_SIZE / GLYPH_MAX_SIZE)) * GLYPH_MAX_SIZE; + for (s = GLYPH_MIN_SIZE; s < GLYPH_MAX_SIZE; s *= 2) { + if (pos & 1) + priv->coordinate.x += s; + if (pos & 2) + priv->coordinate.y += s; + pos >>= 2; + } + + glyph_cache_upload(screen, cache, glyph, + priv->coordinate.x, priv->coordinate.y); + + return TRUE; +} + +static void apply_damage(struct sna_composite_op *op, + const struct sna_composite_rectangles *r) +{ + BoxRec box; + + if (op->damage == NULL) + return; + + box.x1 = r->dst.x + op->dst.x; + box.y1 = r->dst.y + op->dst.y; + box.x2 = box.x1 + r->width; + box.y2 = box.y1 + r->height; + + assert_pixmap_contains_box(op->dst.pixmap, &box); + sna_damage_add_box(op->damage, &box); +} + +#define GET_PRIVATE(g) ((struct sna_glyph *)((char *)(g)->devPrivates + priv_offset)) +static Bool +glyphs_to_dst(struct sna *sna, + CARD8 op, + PicturePtr src, + PicturePtr dst, + INT16 src_x, INT16 src_y, + int nlist, GlyphListPtr list, GlyphPtr *glyphs) +{ + struct sna_composite_op tmp; + ScreenPtr screen = dst->pDrawable->pScreen; + const int priv_offset = sna_glyph_key.offset; + int index = screen->myNum; + PicturePtr glyph_atlas; + BoxPtr rects; + int nrect; + int16_t x, y; + + memset(&tmp, 0, sizeof(tmp)); + + DBG(("%s(op=%d, src=(%d, %d), nlist=%d, dst=(%d, %d)+(%d, %d))\n", + __FUNCTION__, op, src_x, src_y, nlist, + list->xOff, list->yOff, dst->pDrawable->x, dst->pDrawable->y)); + + rects = REGION_RECTS(dst->pCompositeClip); + nrect = REGION_NUM_RECTS(dst->pCompositeClip); + + x = dst->pDrawable->x; + y = dst->pDrawable->y; + src_x -= list->xOff + x; + src_y -= list->yOff + y; + + glyph_atlas = NULL; + while (nlist--) { + int n = list->len; + x += list->xOff; + y += list->yOff; + while (n--) { + GlyphPtr glyph = *glyphs++; + struct sna_glyph priv; + int i; + + if (glyph->info.width == 0 || glyph->info.height == 0) + goto next_glyph; + + priv = *GET_PRIVATE(glyph); + if (priv.atlas == NULL) { + if (glyph_atlas) { + tmp.done(sna, &tmp); + glyph_atlas = NULL; + } + if (!glyph_cache(screen, &sna->render, glyph)) { + /* no cache for this glyph */ + priv.atlas = GlyphPicture(glyph)[index]; + priv.coordinate.x = priv.coordinate.y = 0; + } else + priv = *GET_PRIVATE(glyph); + } + + if (priv.atlas != glyph_atlas) { + if (glyph_atlas) + tmp.done(sna, &tmp); + + if (!sna->render.composite(sna, + op, src, priv.atlas, dst, + 0, 0, 0, 0, 0, 0, + 0, 0, + &tmp)) + return FALSE; + + glyph_atlas = priv.atlas; + } + + for (i = 0; i < nrect; i++) { + struct sna_composite_rectangles r; + int16_t dx, dy; + int16_t x2, y2; + + r.dst.x = x - glyph->info.x; + r.dst.y = y - glyph->info.y; + x2 = r.dst.x + glyph->info.width; + y2 = r.dst.y + glyph->info.height; + dx = dy = 0; + + DBG(("%s: glyph=(%d, %d), (%d, %d), clip=(%d, %d), (%d, %d)\n", + __FUNCTION__, + r.dst.x, r.dst.y, x2, y2, + rects[i].x1, rects[i].y1, + rects[i].x2, rects[i].y2)); + if (rects[i].y1 >= y2) + break; + + if (r.dst.x < rects[i].x1) + dx = rects[i].x1 - r.dst.x, r.dst.x = rects[i].x1; + if (x2 > rects[i].x2) + x2 = rects[i].x2; + if (r.dst.y < rects[i].y1) + dy = rects[i].y1 - r.dst.y, r.dst.y = rects[i].y1; + if (y2 > rects[i].y2) + y2 = rects[i].y2; + + if (r.dst.x < x2 && r.dst.y < y2) { + DBG(("%s: blt=(%d, %d), (%d, %d)\n", + __FUNCTION__, r.dst.x, r.dst.y, x2, y2)); + + r.src.x = r.dst.x + src_x; + r.src.y = r.dst.y + src_y; + r.mask.x = dx + priv.coordinate.x; + r.mask.y = dy + priv.coordinate.y; + r.width = x2 - r.dst.x; + r.height = y2 - r.dst.y; + tmp.blt(sna, &tmp, &r); + apply_damage(&tmp, &r); + } + } + +next_glyph: + x += glyph->info.xOff; + y += glyph->info.yOff; + } + list++; + } + if (glyph_atlas) + tmp.done(sna, &tmp); + + return TRUE; +} + +static Bool +glyphs_to_dst_slow(struct sna *sna, + CARD8 op, + PicturePtr src, + PicturePtr dst, + INT16 src_x, INT16 src_y, + int nlist, GlyphListPtr list, GlyphPtr *glyphs) +{ + struct sna_composite_op tmp; + ScreenPtr screen = dst->pDrawable->pScreen; + const int priv_offset = sna_glyph_key.offset; + int index = screen->myNum; + int x, y, n; + + memset(&tmp, 0, sizeof(tmp)); + + DBG(("%s(op=%d, src=(%d, %d), nlist=%d, dst=(%d, %d)+(%d, %d))\n", + __FUNCTION__, op, src_x, src_y, nlist, + list->xOff, list->yOff, dst->pDrawable->x, dst->pDrawable->y)); + + x = dst->pDrawable->x; + y = dst->pDrawable->y; + src_x -= list->xOff + x; + src_y -= list->yOff + y; + + while (nlist--) { + x += list->xOff; + y += list->yOff; + n = list->len; + while (n--) { + GlyphPtr glyph = *glyphs++; + struct sna_glyph priv; + BoxPtr rects; + int nrect; + + if (glyph->info.width == 0 || glyph->info.height == 0) + goto next_glyph; + + priv = *GET_PRIVATE(glyph); + if (priv.atlas == NULL) { + if (!glyph_cache(screen, &sna->render, glyph)) { + /* no cache for this glyph */ + priv.atlas = GlyphPicture(glyph)[index]; + priv.coordinate.x = priv.coordinate.y = 0; + } else + priv = *GET_PRIVATE(glyph); + } + + if (!sna->render.composite(sna, + op, src, priv.atlas, dst, + src_x + x - glyph->info.x, + src_y + y - glyph->info.y, + priv.coordinate.x, priv.coordinate.y, + x - glyph->info.x, + y - glyph->info.y, + glyph->info.width, + glyph->info.height, + &tmp)) + return FALSE; + + rects = REGION_RECTS(dst->pCompositeClip); + nrect = REGION_NUM_RECTS(dst->pCompositeClip); + do { + struct sna_composite_rectangles r; + int16_t dx, dy; + int16_t x2, y2; + + r.dst.x = x - glyph->info.x; + r.dst.y = y - glyph->info.y; + x2 = r.dst.x + glyph->info.width; + y2 = r.dst.y + glyph->info.height; + dx = dy = 0; + + DBG(("%s: glyph=(%d, %d), (%d, %d), clip=(%d, %d), (%d, %d)\n", + __FUNCTION__, + r.dst.x, r.dst.y, x2, y2, + rects->x1, rects->y1, + rects->x2, rects->y2)); + if (rects->y1 >= y2) + break; + + if (r.dst.x < rects->x1) + dx = rects->x1 - r.dst.x, r.dst.x = rects->x1; + if (x2 > rects->x2) + x2 = rects->x2; + if (r.dst.y < rects->y1) + dy = rects->y1 - r.dst.y, r.dst.y = rects->y1; + if (y2 > rects->y2) + y2 = rects->y2; + + if (r.dst.x < x2 && r.dst.y < y2) { + DBG(("%s: blt=(%d, %d), (%d, %d)\n", + __FUNCTION__, r.dst.x, r.dst.y, x2, y2)); + + r.src.x = r.dst.x + src_x; + r.src.y = r.dst.y + src_y; + r.mask.x = dx + priv.coordinate.x; + r.mask.y = dy + priv.coordinate.y; + r.width = x2 - r.dst.x; + r.height = y2 - r.dst.y; + tmp.blt(sna, &tmp, &r); + apply_damage(&tmp, &r); + } + rects++; + } while (--nrect); + tmp.done(sna, &tmp); + +next_glyph: + x += glyph->info.xOff; + y += glyph->info.yOff; + } + list++; + } + + return TRUE; +} + +static Bool +clear_pixmap(struct sna *sna, PixmapPtr pixmap, PictFormat format) +{ + BoxRec box; + xRenderColor color = { 0 }; + + box.x1 = box.y1 = 0; + box.x2 = pixmap->drawable.width; + box.y2 = pixmap->drawable.height; + + return sna->render.fill_boxes(sna, PictOpClear, format, &color, + pixmap, sna_pixmap_get_bo(pixmap), + &box, 1); +} + +static Bool +glyphs_via_mask(struct sna *sna, + CARD8 op, + PicturePtr src, + PicturePtr dst, + PictFormatPtr format, + INT16 src_x, INT16 src_y, + int nlist, GlyphListPtr list, GlyphPtr *glyphs) +{ + ScreenPtr screen = dst->pDrawable->pScreen; + struct sna_composite_op tmp; + const int priv_offset = sna_glyph_key.offset; + int index = screen->myNum; + CARD32 component_alpha; + PixmapPtr pixmap; + PicturePtr glyph_atlas, mask; + int16_t x, y, width, height; + int n, error; + BoxRec box; + + DBG(("%s(op=%d, src=(%d, %d), nlist=%d, dst=(%d, %d)+(%d, %d))\n", + __FUNCTION__, op, src_x, src_y, nlist, + list->xOff, list->yOff, dst->pDrawable->x, dst->pDrawable->y)); + + glyph_extents(nlist, list, glyphs, &box); + if (box.x2 <= box.x1 || box.y2 <= box.y1) + return TRUE; + + DBG(("%s: bounds=((%d, %d), (%d, %d))\n", __FUNCTION__, + box.x1, box.y1, box.x2, box.y2)); + + if (!sna_compute_composite_extents(&box, + src, NULL, dst, + src_x, src_y, + 0, 0, + box.x1, box.y1, + box.x2 - box.x1, + box.y2 - box.y1)) + return TRUE; + + DBG(("%s: extents=((%d, %d), (%d, %d))\n", __FUNCTION__, + box.x1, box.y1, box.x2, box.y2)); + + memset(&tmp, 0, sizeof(tmp)); + + width = box.x2 - box.x1; + height = box.y2 - box.y1; + box.x1 -= dst->pDrawable->x; + box.y1 -= dst->pDrawable->y; + x = -box.x1; + y = -box.y1; + src_x += box.x1 - list->xOff; + src_y += box.y1 - list->yOff; + + if (format->depth == 1) { + PictFormatPtr a8Format = + PictureMatchFormat(screen, 8, PICT_a8); + if (!a8Format) + return FALSE; + + format = a8Format; + } + + pixmap = screen->CreatePixmap(screen, + width, height, format->depth, + CREATE_PIXMAP_USAGE_SCRATCH); + if (!pixmap) + return FALSE; + + component_alpha = NeedsComponent(format->format); + mask = CreatePicture(0, &pixmap->drawable, + format, CPComponentAlpha, + &component_alpha, serverClient, &error); + screen->DestroyPixmap(pixmap); + if (!mask) + return FALSE; + + ValidatePicture(mask); + + if (!clear_pixmap(sna, pixmap, mask->format)) { + FreePicture(mask, 0); + return FALSE; + } + + glyph_atlas = NULL; + do { + x += list->xOff; + y += list->yOff; + n = list->len; + while (n--) { + GlyphPtr glyph = *glyphs++; + struct sna_glyph *priv; + PicturePtr this_atlas; + struct sna_composite_rectangles r; + + if (glyph->info.width == 0 || glyph->info.height == 0) + goto next_glyph; + + priv = GET_PRIVATE(glyph); + if (priv->atlas != NULL) { + this_atlas = priv->atlas; + r.src = priv->coordinate; + } else { + if (glyph_atlas) { + tmp.done(sna, &tmp); + glyph_atlas = NULL; + } + if (glyph_cache(screen, &sna->render, glyph)) { + this_atlas = priv->atlas; + r.src = priv->coordinate; + } else { + /* no cache for this glyph */ + this_atlas = GlyphPicture(glyph)[index]; + r.src.x = r.src.y = 0; + } + } + + if (this_atlas != glyph_atlas) { + if (glyph_atlas) + tmp.done(sna, &tmp); + + if (!sna->render.composite(sna, PictOpAdd, + this_atlas, NULL, mask, + 0, 0, 0, 0, 0, 0, + 0, 0, + &tmp)) { + FreePicture(mask, 0); + return FALSE; + } + + glyph_atlas = this_atlas; + } + + DBG(("%s: blt glyph origin (%d, %d), offset (%d, %d), src (%d, %d), size (%d, %d)\n", + __FUNCTION__, + x, y, + glyph->info.x, glyph->info.y, + r.src.x, r.src.y, + glyph->info.width, glyph->info.height)); + + r.dst.x = x - glyph->info.x; + r.dst.y = y - glyph->info.y; + r.width = glyph->info.width; + r.height = glyph->info.height; + tmp.blt(sna, &tmp, &r); + +next_glyph: + x += glyph->info.xOff; + y += glyph->info.yOff; + } + list++; + } while (--nlist); + if (glyph_atlas) + tmp.done(sna, &tmp); + + sna_composite(op, + src, mask, dst, + src_x, src_y, + 0, 0, + box.x1, box.y1, + width, height); + + FreePicture(mask, 0); + return TRUE; +} + +Bool sna_glyphs_init(ScreenPtr screen) +{ + if (!dixRegisterPrivateKey(&sna_glyph_key, + PRIVATE_GLYPH, + sizeof(struct sna_glyph))) + return FALSE; + + return TRUE; +} + +Bool sna_glyphs_create(struct sna *sna) +{ + return realize_glyph_caches(sna); +} + +static PictFormatPtr +glyphs_format(int nlist, GlyphListPtr list, GlyphPtr * glyphs) +{ + PictFormatPtr format = list[0].format; + int16_t x1, x2, y1, y2; + int16_t x, y; + BoxRec extents; + Bool first = TRUE; + + x = 0; + y = 0; + extents.x1 = 0; + extents.y1 = 0; + extents.x2 = 0; + extents.y2 = 0; + while (nlist--) { + int n = list->len; + + if (format->format != list->format->format) + return NULL; + + x += list->xOff; + y += list->yOff; + list++; + while (n--) { + GlyphPtr glyph = *glyphs++; + + if (glyph->info.width == 0 || glyph->info.height == 0) { + x += glyph->info.xOff; + y += glyph->info.yOff; + continue; + } + + x1 = x - glyph->info.x; + if (x1 < MINSHORT) + x1 = MINSHORT; + y1 = y - glyph->info.y; + if (y1 < MINSHORT) + y1 = MINSHORT; + x2 = x1 + glyph->info.width; + if (x2 > MAXSHORT) + x2 = MAXSHORT; + y2 = y1 + glyph->info.height; + if (y2 > MAXSHORT) + y2 = MAXSHORT; + + if (first) { + extents.x1 = x1; + extents.y1 = y1; + extents.x2 = x2; + extents.y2 = y2; + first = FALSE; + } else { + /* Potential overlap */ + if (x1 < extents.x2 && x2 > extents.x1 && + y1 < extents.y2 && y2 > extents.y1) + return NULL; + + if (x1 < extents.x1) + extents.x1 = x1; + if (x2 > extents.x2) + extents.x2 = x2; + if (y1 < extents.y1) + extents.y1 = y1; + if (y2 > extents.y2) + extents.y2 = y2; + } + x += glyph->info.xOff; + y += glyph->info.yOff; + } + } + + return format; +} + +static void +glyphs_fallback(CARD8 op, + PicturePtr src, + PicturePtr dst, + PictFormatPtr mask_format, + int src_x, + int src_y, + int nlist, + GlyphListPtr list, + GlyphPtr *glyphs) +{ + int screen = dst->pDrawable->pScreen->myNum; + pixman_image_t *dst_image, *mask_image, *src_image; + int dx, dy, x, y; + BoxRec box; + RegionRec region; + + glyph_extents(nlist, list, glyphs, &box); + if (box.x2 <= box.x1 || box.y2 <= box.y1) + return; + + DBG(("%s: (%d, %d), (%d, %d)\n", + __FUNCTION__, box.x1, box.y1, box.x2, box.y2)); + + RegionInit(®ion, &box, 1); + RegionTranslate(®ion, dst->pDrawable->x, dst->pDrawable->y); + if (dst->pCompositeClip) + RegionIntersect(®ion, ®ion, dst->pCompositeClip); + DBG(("%s: clipped extents (%d, %d), (%d, %d)\n", + __FUNCTION__, + RegionExtents(®ion)->x1, RegionExtents(®ion)->y1, + RegionExtents(®ion)->x2, RegionExtents(®ion)->y2)); + if (!RegionNotEmpty(®ion)) + return; + + sna_drawable_move_region_to_cpu(dst->pDrawable, ®ion, + true); + if (src->pDrawable) + sna_drawable_move_to_cpu(src->pDrawable, false); + + dst_image = image_from_pict(dst, TRUE, &x, &y); + DBG(("%s: dst offset (%d, %d)\n", __FUNCTION__, x, y)); + box.x1 += x; + box.x2 += x; + box.y1 += y; + box.y2 += y; + + src_image = image_from_pict(src, FALSE, &dx, &dy); + DBG(("%s: src offset (%d, %d)\n", __FUNCTION__, dx, dy)); + src_x += dx - list->xOff - x; + src_y += dy - list->yOff - y; + + if (mask_format) { + mask_image = + pixman_image_create_bits(mask_format->depth << 24 | mask_format->format, + box.x2 - box.x1, box.y2 - box.y1, + NULL, 0); + if (NeedsComponent(mask_format->format)) + pixman_image_set_component_alpha(mask_image, TRUE); + + x -= box.x1; + y -= box.y1; + } else + mask_image = dst_image; + + do { + int n = list->len; + x += list->xOff; + y += list->yOff; + while (n--) { + GlyphPtr g = *glyphs++; + PicturePtr picture; + pixman_image_t *glyph_image; + int dx, dy; + + if (g->info.width == 0 || g->info.height == 0) + goto next_glyph; + + picture = GlyphPicture(g)[screen]; + if (picture == NULL) + goto next_glyph; + + glyph_image = image_from_pict(picture, FALSE, &dx, &dy); + if (!glyph_image) + goto next_glyph; + + if (mask_format) { + DBG(("%s: glyph+(%d,%d) to mask (%d, %d)x(%d, %d)\n", + __FUNCTION__, + dx,dy, + x - g->info.x, + y - g->info.y, + g->info.width, + g->info.height)); + + pixman_image_composite(PictOpAdd, + glyph_image, + NULL, + mask_image, + dx, dy, + 0, 0, + x - g->info.x, + y - g->info.y, + g->info.width, + g->info.height); + } else { + DBG(("%s: glyph+(%d, %d) to dst (%d, %d)x(%d, %d)\n", + __FUNCTION__, + dx, dy, + x - g->info.x, + y - g->info.y, + g->info.width, + g->info.height)); + + pixman_image_composite(op, + src_image, + glyph_image, + dst_image, + src_x + (x - g->info.x), + src_y + (y - g->info.y), + dx, dy, + x - g->info.x, + y - g->info.y, + g->info.width, + g->info.height); + } + free_pixman_pict(picture,glyph_image); + +next_glyph: + x += g->info.xOff; + y += g->info.yOff; + } + list++; + } while (--nlist); + + if (mask_format) { + DBG(("%s: glyph mask composite src=(%d,%d) dst=(%d, %d)x(%d, %d)\n", + __FUNCTION__, + src_x + box.x1, + src_y + box.y1, + box.x1, box.y1, + box.x2-box.x1, box.y2-box.y1)); + pixman_image_composite(op, src_image, mask_image, dst_image, + src_x + box.x1, + src_y + box.y1, + 0, 0, + box.x1, box.y1, + box.x2 - box.x1, + box.y2 - box.y1); + pixman_image_unref(mask_image); + } + + free_pixman_pict(src, src_image); + free_pixman_pict(dst, dst_image); + RegionUninit(®ion); +} + +void +sna_glyphs(CARD8 op, + PicturePtr src, + PicturePtr dst, + PictFormatPtr mask, + INT16 src_x, INT16 src_y, + int nlist, GlyphListPtr list, GlyphPtr *glyphs) +{ + struct sna *sna = to_sna_from_drawable(dst->pDrawable); + PictFormatPtr _mask; + + DBG(("%s(op=%d, nlist=%d, src=(%d, %d))\n", + __FUNCTION__, op, nlist, src_x, src_y)); + + if (REGION_NUM_RECTS(dst->pCompositeClip) == 0) + return; + + if (sna->kgem.wedged || !sna->have_render) { + DBG(("%s: no render (wedged=%d)\n", + __FUNCTION__, sna->kgem.wedged)); + goto fallback; + } + + if (too_small(sna, dst->pDrawable) && !picture_is_gpu(src)) { + DBG(("%s: fallback -- too small\n", __FUNCTION__)); + goto fallback; + } + + if (dst->alphaMap || src->alphaMap) { + DBG(("%s: fallback -- alpha maps\n", __FUNCTION__)); + goto fallback; + } + + /* XXX discard the mask for non-overlapping glyphs? */ + + if (!mask) { + if (glyphs_to_dst(sna, op, + src, dst, + src_x, src_y, + nlist, list, glyphs)) + return; + } + + _mask = mask; + if (!_mask) + _mask = glyphs_format(nlist, list, glyphs); + if (_mask) { + if (glyphs_via_mask(sna, op, + src, dst, _mask, + src_x, src_y, + nlist, list, glyphs)) + return; + } else { + if (glyphs_to_dst_slow(sna, op, + src, dst, + src_x, src_y, + nlist, list, glyphs)) + return; + } + +fallback: + glyphs_fallback(op, src, dst, mask, src_x, src_y, nlist, list, glyphs); +} + +void +sna_glyph_unrealize(ScreenPtr screen, GlyphPtr glyph) +{ + struct sna_glyph_cache *cache; + struct sna_glyph *priv; + struct sna *sna; + + priv = glyph_get_private(glyph); + if (priv->atlas == NULL) + return; + + sna = to_sna_from_screen(screen); + cache = &sna->render.glyph[priv->pos & 1]; + assert(cache->glyphs[priv->pos >> 1] == priv); + cache->glyphs[priv->pos >> 1] = NULL; + priv->atlas = NULL; +} + +void sna_glyphs_close(struct sna *sna) +{ + unrealize_glyph_caches(sna); +} diff --git a/src/sna/sna_gradient.c b/src/sna/sna_gradient.c new file mode 100644 index 00000000..5cfc81aa --- /dev/null +++ b/src/sna/sna_gradient.c @@ -0,0 +1,335 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Chris Wilson <chris@chris-wilson.co.uk> + * + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "sna.h" +#include "sna_render.h" + +#if DEBUG_GRADIENT +#undef DBG +#define DBG(x) ErrorF x +#endif + +#define xFixedToDouble(f) pixman_fixed_to_double(f) + +static int +sna_gradient_sample_width(PictGradient *gradient) +{ + unsigned int n; + int width; + + width = 2; + for (n = 1; n < gradient->nstops; n++) { + xFixed dx = gradient->stops[n].x - gradient->stops[n-1].x; + uint16_t delta, max; + int ramp; + + if (dx == 0) + return 0; + + max = gradient->stops[n].color.red - + gradient->stops[n-1].color.red; + + delta = gradient->stops[n].color.green - + gradient->stops[n-1].color.green; + if (delta > max) + max = delta; + + delta = gradient->stops[n].color.blue - + gradient->stops[n-1].color.blue; + if (delta > max) + max = delta; + + delta = gradient->stops[n].color.alpha - + gradient->stops[n-1].color.alpha; + if (delta > max) + max = delta; + + ramp = 128 * max / xFixedToDouble(dx); + if (ramp > width) + width = ramp; + } + + width *= gradient->nstops-1; + width = (width + 7) & -8; + return min(width, 1024); +} + +static Bool +_gradient_color_stops_equal(PictGradient *pattern, + struct sna_gradient_cache *cache) +{ + if (cache->nstops != pattern->nstops) + return FALSE; + + return memcmp(cache->stops, + pattern->stops, + sizeof(PictGradientStop)*cache->nstops) == 0; +} + +struct kgem_bo * +sna_render_get_gradient(struct sna *sna, + PictGradient *pattern) +{ + struct sna_render *render = &sna->render; + struct sna_gradient_cache *cache; + pixman_image_t *gradient, *image; + pixman_point_fixed_t p1, p2; + unsigned int i, width; + struct kgem_bo *bo; + + DBG(("%s: %dx[%f:%x...%f:%x...%f:%x]\n", __FUNCTION__, + pattern->nstops, + pattern->stops[0].x / 65536., + pattern->stops[0].color.alpha >> 8 << 24 | + pattern->stops[0].color.red >> 8 << 16 | + pattern->stops[0].color.green >> 8 << 8 | + pattern->stops[0].color.blue >> 8 << 0, + pattern->stops[pattern->nstops/2].x / 65536., + pattern->stops[pattern->nstops/2].color.alpha >> 8 << 24 | + pattern->stops[pattern->nstops/2].color.red >> 8 << 16 | + pattern->stops[pattern->nstops/2].color.green >> 8 << 8 | + pattern->stops[pattern->nstops/2].color.blue >> 8 << 0, + pattern->stops[pattern->nstops-1].x / 65536., + pattern->stops[pattern->nstops-1].color.alpha >> 8 << 24 | + pattern->stops[pattern->nstops-1].color.red >> 8 << 16 | + pattern->stops[pattern->nstops-1].color.green >> 8 << 8 | + pattern->stops[pattern->nstops-1].color.blue >> 8 << 0)); + + for (i = 0; i < render->gradient_cache.size; i++) { + cache = &render->gradient_cache.cache[i]; + if (_gradient_color_stops_equal(pattern, cache)) { + DBG(("%s: old --> %d\n", __FUNCTION__, i)); + return kgem_bo_reference(cache->bo); + } + } + + width = sna_gradient_sample_width(pattern); + DBG(("%s: sample width = %d\n", __FUNCTION__, width)); + if (width == 0) + return NULL; + + p1.x = 0; + p1.y = 0; + p2.x = width << 16; + p2.y = 0; + + gradient = pixman_image_create_linear_gradient(&p1, &p2, + (pixman_gradient_stop_t *)pattern->stops, + pattern->nstops); + if (gradient == NULL) + return NULL; + + pixman_image_set_filter(gradient, PIXMAN_FILTER_BILINEAR, NULL, 0); + pixman_image_set_repeat(gradient, PIXMAN_REPEAT_PAD); + + image = pixman_image_create_bits(PIXMAN_a8r8g8b8, width, 1, NULL, 0); + if (image == NULL) { + pixman_image_unref(gradient); + return NULL; + } + + pixman_image_composite(PIXMAN_OP_SRC, + gradient, NULL, image, + 0, 0, + 0, 0, + 0, 0, + width, 1); + pixman_image_unref(gradient); + + DBG(("%s: [0]=%x, [%d]=%x [%d]=%x\n", __FUNCTION__, + pixman_image_get_data(image)[0], + width/2, pixman_image_get_data(image)[width/2], + width-1, pixman_image_get_data(image)[width-1])); + + bo = kgem_create_linear(&sna->kgem, width*4); + if (!bo) { + pixman_image_unref(image); + return NULL; + } + + bo->pitch = 4*width; + kgem_bo_write(&sna->kgem, bo, pixman_image_get_data(image), 4*width); + + pixman_image_unref(image); + + if (render->gradient_cache.size < GRADIENT_CACHE_SIZE) + i = render->gradient_cache.size++; + else + i = rand () % GRADIENT_CACHE_SIZE; + + cache = &render->gradient_cache.cache[i]; + if (cache->nstops < pattern->nstops) { + PictGradientStop *newstops; + + newstops = malloc(sizeof(PictGradientStop) * pattern->nstops); + if (newstops == NULL) + return bo; + + free(cache->stops); + cache->stops = newstops; + } + + memcpy(cache->stops, pattern->stops, + sizeof(PictGradientStop) * pattern->nstops); + cache->nstops = pattern->nstops; + + if (cache->bo) + kgem_bo_destroy(&sna->kgem, cache->bo); + cache->bo = kgem_bo_reference(bo); + + return bo; +} + +void +sna_render_flush_solid(struct sna *sna) +{ + struct sna_solid_cache *cache = &sna->render.solid_cache; + + DBG(("sna_render_flush_solid(size=%d)\n", cache->size)); + + kgem_bo_write(&sna->kgem, cache->cache_bo, + cache->color, cache->size*sizeof(uint32_t)); + cache->dirty = 0; +} + +static void +sna_render_finish_solid(struct sna *sna, bool force) +{ + struct sna_solid_cache *cache = &sna->render.solid_cache; + int i; + + DBG(("sna_render_finish_solid(force=%d, busy=%d, dirty=%d)\n", + force, cache->cache_bo->gpu, cache->dirty)); + + if (!force && !cache->cache_bo->gpu) + return; + + if (cache->dirty) + sna_render_flush_solid(sna); + + for (i = 0; i < cache->size; i++) + kgem_bo_destroy(&sna->kgem, cache->bo[i]); + kgem_bo_destroy(&sna->kgem, cache->cache_bo); + + DBG(("sna_render_finish_solid reset\n")); + + cache->cache_bo = kgem_create_linear(&sna->kgem, sizeof(cache->color)); + cache->bo[0] = kgem_create_proxy(cache->cache_bo, 0, sizeof(uint32_t)); + cache->bo[0]->pitch = 4; + cache->size = 1; +} + +struct kgem_bo * +sna_render_get_solid(struct sna *sna, uint32_t color) +{ + struct sna_solid_cache *cache = &sna->render.solid_cache; + unsigned int i; + + if (color == 0) { + DBG(("%s(clear)\n", __FUNCTION__)); + return kgem_bo_reference(cache->bo[0]); + } + + if (cache->color[cache->last] == color) { + DBG(("sna_render_get_solid(%d) = %x (last)\n", + cache->last, color)); + return kgem_bo_reference(cache->bo[cache->last]); + } + + for (i = 1; i < cache->size; i++) { + if (cache->color[i] == color) { + DBG(("sna_render_get_solid(%d) = %x (old)\n", + i, color)); + goto done; + } + } + + sna_render_finish_solid(sna, i == ARRAY_SIZE(cache->color)); + + i = cache->size++; + cache->color[i] = color; + cache->bo[i] = kgem_create_proxy(cache->cache_bo, + i*sizeof(uint32_t), sizeof(uint32_t)); + cache->bo[i]->pitch = 4; + cache->dirty = 1; + DBG(("sna_render_get_solid(%d) = %x (new)\n", i, color)); + +done: + cache->last = i; + return kgem_bo_reference(cache->bo[i]); +} + +static Bool sna_solid_cache_init(struct sna *sna) +{ + struct sna_solid_cache *cache = &sna->render.solid_cache; + + cache->cache_bo = + kgem_create_linear(&sna->kgem, sizeof(cache->color)); + if (!cache->cache_bo) + return FALSE; + + cache->bo[0] = kgem_create_proxy(cache->cache_bo, 0, sizeof(uint32_t)); + cache->bo[0]->pitch = 4; + cache->size = 1; + cache->last = 0; + return TRUE; +} + +Bool sna_gradients_create(struct sna *sna) +{ + return sna_solid_cache_init(sna); +} + +void sna_gradients_close(struct sna *sna) +{ + int i; + + if (sna->render.solid_cache.cache_bo) + kgem_bo_destroy(&sna->kgem, sna->render.solid_cache.cache_bo); + for (i = 0; i < sna->render.solid_cache.size; i++) + kgem_bo_destroy(&sna->kgem, sna->render.solid_cache.bo[i]); + sna->render.solid_cache.cache_bo = 0; + sna->render.solid_cache.size = 0; + sna->render.solid_cache.dirty = 0; + + for (i = 0; i < sna->render.gradient_cache.size; i++) { + struct sna_gradient_cache *cache = + &sna->render.gradient_cache.cache[i]; + + if (cache->bo) + kgem_bo_destroy(&sna->kgem, cache->bo); + + free(cache->stops); + cache->stops = NULL; + cache->nstops = 0; + } + sna->render.gradient_cache.size = 0; +} diff --git a/src/sna/sna_io.c b/src/sna/sna_io.c new file mode 100644 index 00000000..41f36713 --- /dev/null +++ b/src/sna/sna_io.c @@ -0,0 +1,452 @@ +/* + * Copyright (c) 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Chris Wilson <chris@chris-wilson.co.uk> + * + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "sna.h" +#include "sna_render.h" +#include "sna_reg.h" + +#include <sys/mman.h> + +#if DEBUG_IO +#undef DBG +#define DBG(x) ErrorF x +#else +#define NDEBUG 1 +#endif + +#define PITCH(x, y) ALIGN((x)*(y), 4) + +static void read_boxes_inplace(struct kgem *kgem, + struct kgem_bo *bo, int16_t src_dx, int16_t src_dy, + PixmapPtr pixmap, int16_t dst_dx, int16_t dst_dy, + const BoxRec *box, int n) +{ + int bpp = pixmap->drawable.bitsPerPixel; + void *src, *dst = pixmap->devPrivate.ptr; + int src_pitch = bo->pitch; + int dst_pitch = pixmap->devKind; + + DBG(("%s x %d, tiling=%d\n", __FUNCTION__, n, bo->tiling)); + + kgem_bo_submit(kgem, bo); + + src = kgem_bo_map(kgem, bo, PROT_READ); + if (src == NULL) + return; + + do { + memcpy_blt(src, dst, bpp, + src_pitch, dst_pitch, + box->x1 + src_dx, box->y1 + src_dy, + box->x1 + dst_dx, box->y1 + dst_dy, + box->x2 - box->x1, box->y2 - box->y1); + box++; + } while (--n); + + munmap(src, bo->size); +} + +void sna_read_boxes(struct sna *sna, + struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy, + PixmapPtr dst, int16_t dst_dx, int16_t dst_dy, + const BoxRec *box, int nbox) +{ + struct kgem *kgem = &sna->kgem; + struct kgem_bo *dst_bo; + int tmp_nbox; + const BoxRec *tmp_box; + char *src; + void *ptr; + int src_pitch, cpp, offset; + int n, cmd, br13; + + DBG(("%s x %d, src=(handle=%d, offset=(%d,%d)), dst=(size=(%d, %d), offset=(%d,%d))\n", + __FUNCTION__, nbox, src_bo->handle, src_dx, src_dy, + dst->drawable.width, dst->drawable.height, dst_dx, dst_dy)); + + if (DEBUG_NO_IO || kgem->wedged || + !kgem_bo_is_busy(kgem, src_bo) || + src_bo->tiling == I915_TILING_Y) { + read_boxes_inplace(kgem, + src_bo, src_dx, src_dy, + dst, dst_dx, dst_dy, + box, nbox); + return; + } + + /* count the total number of bytes to be read and allocate a bo */ + cpp = dst->drawable.bitsPerPixel / 8; + offset = 0; + for (n = 0; n < nbox; n++) { + int height = box[n].y2 - box[n].y1; + int width = box[n].x2 - box[n].x1; + offset += PITCH(width, cpp) * height; + } + + DBG((" read buffer size=%d\n", offset)); + + dst_bo = kgem_create_buffer(kgem, offset, KGEM_BUFFER_LAST, &ptr); + if (!dst_bo) { + read_boxes_inplace(kgem, + src_bo, src_dx, src_dy, + dst, dst_dx, dst_dy, + box, nbox); + return; + } + + cmd = XY_SRC_COPY_BLT_CMD; + if (cpp == 4) + cmd |= BLT_WRITE_ALPHA | BLT_WRITE_RGB; + src_pitch = src_bo->pitch; + if (kgem->gen >= 40 && src_bo->tiling) { + cmd |= BLT_SRC_TILED; + src_pitch >>= 2; + } + + br13 = 0xcc << 16; + switch (cpp) { + default: + case 4: br13 |= 1 << 25; /* RGB8888 */ + case 2: br13 |= 1 << 24; /* RGB565 */ + case 1: break; + } + + kgem_set_mode(kgem, KGEM_BLT); + if (kgem->nexec + 2 > KGEM_EXEC_SIZE(kgem) || + kgem->nreloc + 2 > KGEM_RELOC_SIZE(kgem) || + !kgem_check_batch(kgem, 8) || + !kgem_check_bo_fenced(kgem, dst_bo, src_bo, NULL)) + _kgem_submit(kgem); + + tmp_nbox = nbox; + tmp_box = box; + offset = 0; + do { + int nbox_this_time; + + nbox_this_time = tmp_nbox; + if (8*nbox_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) + nbox_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 8; + if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc) + nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc) / 2; + assert(nbox_this_time); + tmp_nbox -= nbox_this_time; + + for (n = 0; n < nbox_this_time; n++) { + int height = tmp_box[n].y2 - tmp_box[n].y1; + int width = tmp_box[n].x2 - tmp_box[n].x1; + int pitch = PITCH(width, cpp); + uint32_t *b = kgem->batch + kgem->nbatch; + + DBG((" blt offset %x: (%d, %d) x (%d, %d), pitch=%d\n", + offset, + tmp_box[n].x1 + src_dx, + tmp_box[n].y1 + src_dy, + width, height, pitch)); + + assert(tmp_box[n].x1 + src_dx >= 0); + assert((tmp_box[n].x2 + src_dx) * dst->drawable.bitsPerPixel/8 <= src_bo->pitch); + assert(tmp_box[n].y1 + src_dy >= 0); + assert((tmp_box[n].y2 + src_dy) * src_bo->pitch <= src_bo->size); + + b[0] = cmd; + b[1] = br13 | pitch; + b[2] = 0; + b[3] = height << 16 | width; + b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst_bo, + I915_GEM_DOMAIN_RENDER << 16 | + I915_GEM_DOMAIN_RENDER | + KGEM_RELOC_FENCED, + offset); + b[5] = (tmp_box[n].y1 + src_dy) << 16 | (tmp_box[n].x1 + src_dx); + b[6] = src_pitch; + b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src_bo, + I915_GEM_DOMAIN_RENDER << 16 | + KGEM_RELOC_FENCED, + 0); + kgem->nbatch += 8; + + offset += pitch * height; + } + tmp_box += nbox_this_time; + + _kgem_submit(kgem); + } while (tmp_nbox); + assert(offset == dst_bo->size); + + kgem_buffer_sync(kgem, dst_bo); + + src = ptr; + do { + int height = box->y2 - box->y1; + int width = box->x2 - box->x1; + int pitch = PITCH(width, cpp); + + DBG((" copy offset %lx [%08x...%08x]: (%d, %d) x (%d, %d), src pitch=%d, dst pitch=%d, bpp=%d\n", + (long)((char *)src - (char *)ptr), + *(uint32_t*)src, *(uint32_t*)(src+pitch*height - 4), + box->x1 + dst_dx, + box->y1 + dst_dy, + width, height, + pitch, dst->devKind, cpp*8)); + + assert(box->x1 + dst_dx >= 0); + assert(box->x2 + dst_dx <= dst->drawable.width); + assert(box->y1 + dst_dy >= 0); + assert(box->y2 + dst_dy <= dst->drawable.height); + + memcpy_blt(src, dst->devPrivate.ptr, cpp*8, + pitch, dst->devKind, + 0, 0, + box->x1 + dst_dx, box->y1 + dst_dy, + width, height); + box++; + + src += pitch * height; + } while (--nbox); + assert(src - (char *)ptr == dst_bo->size); + kgem_bo_destroy(kgem, dst_bo); +} + +static void write_boxes_inplace(struct kgem *kgem, + const void *src, int stride, int bpp, int16_t src_dx, int16_t src_dy, + struct kgem_bo *bo, int16_t dst_dx, int16_t dst_dy, + const BoxRec *box, int n) +{ + int dst_pitch = bo->pitch; + int src_pitch = stride; + void *dst; + + DBG(("%s x %d, tiling=%d\n", __FUNCTION__, n, bo->tiling)); + + kgem_bo_submit(kgem, bo); + + dst = kgem_bo_map(kgem, bo, PROT_READ | PROT_WRITE); + if (dst == NULL) + return; + + do { + DBG(("%s: (%d, %d) -> (%d, %d) x (%d, %d) [bpp=%d, src_pitch=%d, dst_pitch=%d]\n", __FUNCTION__, + box->x1 + src_dx, box->y1 + src_dy, + box->x1 + dst_dx, box->y1 + dst_dy, + box->x2 - box->x1, box->y2 - box->y1, + bpp, src_pitch, dst_pitch)); + + memcpy_blt(src, dst, bpp, + src_pitch, dst_pitch, + box->x1 + src_dx, box->y1 + src_dy, + box->x1 + dst_dx, box->y1 + dst_dy, + box->x2 - box->x1, box->y2 - box->y1); + box++; + } while (--n); + + munmap(dst, bo->size); +} + +void sna_write_boxes(struct sna *sna, + struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, + const void *src, int stride, int bpp, int16_t src_dx, int16_t src_dy, + const BoxRec *box, int nbox) +{ + struct kgem *kgem = &sna->kgem; + struct kgem_bo *src_bo; + void *ptr; + int offset; + int n, cmd, br13; + + DBG(("%s x %d\n", __FUNCTION__, nbox)); + + if (DEBUG_NO_IO || kgem->wedged || + !kgem_bo_is_busy(kgem, dst_bo) || + dst_bo->tiling == I915_TILING_Y) { + write_boxes_inplace(kgem, + src, stride, bpp, src_dx, src_dy, + dst_bo, dst_dx, dst_dy, + box, nbox); + return; + } + + cmd = XY_SRC_COPY_BLT_CMD; + if (bpp == 32) + cmd |= BLT_WRITE_ALPHA | BLT_WRITE_RGB; + br13 = dst_bo->pitch; + if (kgem->gen >= 40 && dst_bo->tiling) { + cmd |= BLT_DST_TILED; + br13 >>= 2; + } + br13 |= 0xcc << 16; + switch (bpp) { + default: + case 32: br13 |= 1 << 25; /* RGB8888 */ + case 16: br13 |= 1 << 24; /* RGB565 */ + case 8: break; + } + + kgem_set_mode(kgem, KGEM_BLT); + if (kgem->nexec + 2 > KGEM_EXEC_SIZE(kgem) || + kgem->nreloc + 2 > KGEM_RELOC_SIZE(kgem) || + !kgem_check_batch(kgem, 8) || + !kgem_check_bo_fenced(kgem, dst_bo, NULL)) + _kgem_submit(kgem); + + do { + int nbox_this_time; + + nbox_this_time = nbox; + if (8*nbox_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) + nbox_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 8; + if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc) + nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc) / 2; + assert(nbox_this_time); + nbox -= nbox_this_time; + + /* Count the total number of bytes to be read and allocate a + * single buffer large enough. Or if it is very small, combine + * with other allocations. */ + offset = 0; + for (n = 0; n < nbox_this_time; n++) { + int height = box[n].y2 - box[n].y1; + int width = box[n].x2 - box[n].x1; + offset += PITCH(width, bpp >> 3) * height; + } + + src_bo = kgem_create_buffer(kgem, offset, + KGEM_BUFFER_WRITE | KGEM_BUFFER_LAST, + &ptr); + if (!src_bo) + break; + + offset = 0; + do { + int height = box->y2 - box->y1; + int width = box->x2 - box->x1; + int pitch = PITCH(width, bpp >> 3); + uint32_t *b; + + DBG((" %s: box src=(%d, %d), dst=(%d, %d) size=(%d, %d), dst offset=%d, dst pitch=%d\n", + __FUNCTION__, + box->x1 + src_dx, box->y1 + src_dy, + box->x1 + dst_dx, box->y1 + dst_dy, + width, height, + offset, pitch)); + + assert(box->x1 + src_dx >= 0); + assert((box->x2 + src_dx)*bpp <= 8*stride); + assert(box->y1 + src_dy >= 0); + + assert(box->x1 + dst_dx >= 0); + assert(box->y1 + dst_dy >= 0); + + memcpy_blt(src, (char *)ptr + offset, bpp, + stride, pitch, + box->x1 + src_dx, box->y1 + src_dy, + 0, 0, + width, height); + + b = kgem->batch + kgem->nbatch; + b[0] = cmd; + b[1] = br13; + b[2] = (box->y1 + dst_dy) << 16 | (box->x1 + dst_dx); + b[3] = (box->y2 + dst_dy) << 16 | (box->x2 + dst_dx); + b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst_bo, + I915_GEM_DOMAIN_RENDER << 16 | + I915_GEM_DOMAIN_RENDER | + KGEM_RELOC_FENCED, + 0); + b[5] = 0; + b[6] = pitch; + b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src_bo, + I915_GEM_DOMAIN_RENDER << 16 | + KGEM_RELOC_FENCED, + offset); + kgem->nbatch += 8; + + box++; + offset += pitch * height; + } while (--nbox_this_time); + assert(offset == src_bo->size); + + if (nbox) + _kgem_submit(kgem); + + kgem_bo_destroy(kgem, src_bo); + } while (nbox); + + _kgem_set_mode(kgem, KGEM_BLT); +} + +struct kgem_bo *sna_replace(struct sna *sna, + struct kgem_bo *bo, + int width, int height, int bpp, + const void *src, int stride) +{ + struct kgem *kgem = &sna->kgem; + void *dst; + + DBG(("%s(%d, %d)\n", __FUNCTION__, width, height)); + + assert(bo->reusable); + if (kgem_bo_is_busy(kgem, bo)) { + struct kgem_bo *new_bo; + int tiling = bo->tiling; + + /* As we use fences for GPU BLTs, we often have + * lots of contention upon the limited number of fences. + */ + if (sna->kgem.gen < 40) + tiling = I915_TILING_NONE; + + new_bo = kgem_create_2d(kgem, + width, height, bpp, tiling, + CREATE_INACTIVE); + if (new_bo) { + kgem_bo_destroy(kgem, bo); + bo = new_bo; + } + } + + if (bo->tiling == I915_TILING_NONE && bo->pitch == stride) { + kgem_bo_write(kgem, bo, src, (height-1)*stride + width*bpp/8); + return bo; + } + + dst = kgem_bo_map(kgem, bo, PROT_READ | PROT_WRITE); + if (dst) { + memcpy_blt(src, dst, bpp, + stride, bo->pitch, + 0, 0, + 0, 0, + width, height); + munmap(dst, bo->size); + } + + return bo; +} diff --git a/src/sna/sna_module.h b/src/sna/sna_module.h new file mode 100644 index 00000000..9b14acc3 --- /dev/null +++ b/src/sna/sna_module.h @@ -0,0 +1,3 @@ +const OptionInfoRec *sna_available_options(int chipid, int busid); +void sna_init_scrn(ScrnInfoPtr scrn); + diff --git a/src/sna/sna_reg.h b/src/sna/sna_reg.h new file mode 100644 index 00000000..f6e53979 --- /dev/null +++ b/src/sna/sna_reg.h @@ -0,0 +1,108 @@ +#ifndef SNA_REG_H +#define SNA_REG_H + +/* Flush */ +#define MI_FLUSH (0x04<<23) +#define MI_FLUSH_DW (0x26<<23) + +#define MI_WRITE_DIRTY_STATE (1<<4) +#define MI_END_SCENE (1<<3) +#define MI_GLOBAL_SNAPSHOT_COUNT_RESET (1<<3) +#define MI_INHIBIT_RENDER_CACHE_FLUSH (1<<2) +#define MI_STATE_INSTRUCTION_CACHE_FLUSH (1<<1) +#define MI_INVALIDATE_MAP_CACHE (1<<0) +/* broadwater flush bits */ +#define BRW_MI_GLOBAL_SNAPSHOT_RESET (1 << 3) + +#define MI_BATCH_BUFFER_END (0xA << 23) + +/* Noop */ +#define MI_NOOP 0x00 +#define MI_NOOP_WRITE_ID (1<<22) +#define MI_NOOP_ID_MASK (1<<22 - 1) + +/* Wait for Events */ +#define MI_WAIT_FOR_EVENT (0x03<<23) +#define MI_WAIT_FOR_PIPEB_SVBLANK (1<<18) +#define MI_WAIT_FOR_PIPEA_SVBLANK (1<<17) +#define MI_WAIT_FOR_OVERLAY_FLIP (1<<16) +#define MI_WAIT_FOR_PIPEB_VBLANK (1<<7) +#define MI_WAIT_FOR_PIPEB_SCAN_LINE_WINDOW (1<<5) +#define MI_WAIT_FOR_PIPEA_VBLANK (1<<3) +#define MI_WAIT_FOR_PIPEA_SCAN_LINE_WINDOW (1<<1) + +/* Set the scan line for MI_WAIT_FOR_PIPE?_SCAN_LINE_WINDOW */ +#define MI_LOAD_SCAN_LINES_INCL (0x12<<23) +#define MI_LOAD_SCAN_LINES_DISPLAY_PIPEA (0) +#define MI_LOAD_SCAN_LINES_DISPLAY_PIPEB (0x1<<20) + +/* BLT commands */ +#define COLOR_BLT_CMD ((2<<29)|(0x40<<22)|(0x3)) +#define COLOR_BLT_WRITE_ALPHA (1<<21) +#define COLOR_BLT_WRITE_RGB (1<<20) + +#define XY_COLOR_BLT_CMD ((2<<29)|(0x50<<22)|(0x4)) +#define XY_COLOR_BLT_WRITE_ALPHA (1<<21) +#define XY_COLOR_BLT_WRITE_RGB (1<<20) +#define XY_COLOR_BLT_TILED (1<<11) + +#define XY_SETUP_CLIP_BLT_CMD ((2<<29)|(3<<22)|1) + +#define XY_SRC_COPY_BLT_CMD ((2<<29)|(0x53<<22)|6) +#define XY_SRC_COPY_BLT_WRITE_ALPHA (1<<21) +#define XY_SRC_COPY_BLT_WRITE_RGB (1<<20) +#define XY_SRC_COPY_BLT_SRC_TILED (1<<15) +#define XY_SRC_COPY_BLT_DST_TILED (1<<11) + +#define SRC_COPY_BLT_CMD ((2<<29)|(0x43<<22)|0x4) +#define SRC_COPY_BLT_WRITE_ALPHA (1<<21) +#define SRC_COPY_BLT_WRITE_RGB (1<<20) + +#define XY_PAT_BLT_IMMEDIATE ((2<<29)|(0x72<<22)) + +#define XY_MONO_PAT_BLT_CMD ((0x2<<29)|(0x52<<22)|0x7) +#define XY_MONO_PAT_VERT_SEED ((1<<10)|(1<<9)|(1<<8)) +#define XY_MONO_PAT_HORT_SEED ((1<<14)|(1<<13)|(1<<12)) +#define XY_MONO_PAT_BLT_WRITE_ALPHA (1<<21) +#define XY_MONO_PAT_BLT_WRITE_RGB (1<<20) + +#define XY_MONO_SRC_BLT_CMD ((0x2<<29)|(0x54<<22)|(0x6)) +#define XY_MONO_SRC_BLT_WRITE_ALPHA (1<<21) +#define XY_MONO_SRC_BLT_WRITE_RGB (1<<20) + +/* BLT commands */ +#define BLT_WRITE_ALPHA (1<<21) +#define BLT_WRITE_RGB (1<<20) +#define BLT_SRC_TILED (1<<15) +#define BLT_DST_TILED (1<<11) + +#define COLOR_BLT_CMD ((2<<29)|(0x40<<22)|(0x3)) +#define XY_COLOR_BLT_CMD ((2<<29)|(0x50<<22)|(0x4)) +#define XY_SETUP_CLIP_BLT_CMD ((2<<29)|(3<<22)|1) +#define XY_SRC_COPY_BLT_CMD ((2<<29)|(0x53<<22)|6) +#define SRC_COPY_BLT_CMD ((2<<29)|(0x43<<22)|0x4) +#define XY_PAT_BLT_IMMEDIATE ((2<<29)|(0x72<<22)) +#define XY_MONO_PAT_BLT_CMD ((0x2<<29)|(0x52<<22)|0x7) +#define XY_MONO_SRC_BLT_CMD ((0x2<<29)|(0x54<<22)|(0x6)) + +/* FLUSH commands */ +#define BRW_3D(Pipeline,Opcode,Subopcode) \ + ((3 << 29) | \ + ((Pipeline) << 27) | \ + ((Opcode) << 24) | \ + ((Subopcode) << 16)) +#define PIPE_CONTROL BRW_3D(3, 2, 0) +#define PIPE_CONTROL_NOWRITE (0 << 14) +#define PIPE_CONTROL_WRITE_QWORD (1 << 14) +#define PIPE_CONTROL_WRITE_DEPTH (2 << 14) +#define PIPE_CONTROL_WRITE_TIME (3 << 14) +#define PIPE_CONTROL_DEPTH_STALL (1 << 13) +#define PIPE_CONTROL_WC_FLUSH (1 << 12) +#define PIPE_CONTROL_IS_FLUSH (1 << 11) +#define PIPE_CONTROL_TC_FLUSH (1 << 10) +#define PIPE_CONTROL_NOTIFY_ENABLE (1 << 8) +#define PIPE_CONTROL_GLOBAL_GTT (1 << 2) +#define PIPE_CONTROL_LOCAL_PGTT (0 << 2) +#define PIPE_CONTROL_DEPTH_CACHE_FLUSH (1 << 0) + +#endif diff --git a/src/sna/sna_render.c b/src/sna/sna_render.c new file mode 100644 index 00000000..b6a44d26 --- /dev/null +++ b/src/sna/sna_render.c @@ -0,0 +1,888 @@ +/* + * Copyright © 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Chris Wilson <chris@chris-wilson.co.uk> + * + */ + +#include "sna.h" +#include "sna_render.h" + +#include <fb.h> + +#if DEBUG_RENDER +#undef DBG +#define DBG(x) ErrorF x +#else +#define NDEBUG 1 +#endif + +#define NO_REDIRECT 0 +#define NO_CONVERT 0 +#define NO_FIXUP 0 +#define NO_EXTRACT 0 + +void sna_kgem_reset(struct kgem *kgem) +{ + struct sna *sna = container_of(kgem, struct sna, kgem); + + sna->render.reset(sna); +} + +void sna_kgem_flush(struct kgem *kgem) +{ + struct sna *sna = container_of(kgem, struct sna, kgem); + + sna->render.flush(sna); + + if (sna->render.solid_cache.dirty) + sna_render_flush_solid(sna); +} + +void sna_kgem_context_switch(struct kgem *kgem, int new_mode) +{ + struct sna *sna = container_of(kgem, struct sna, kgem); + + sna->render.context_switch(sna, new_mode); +} + +CARD32 +sna_format_for_depth(int depth) +{ + switch (depth) { + case 1: return PICT_a1; + case 4: return PICT_a4; + case 8: return PICT_a8; + case 15: return PICT_x1r5g5b5; + case 16: return PICT_r5g6b5; + default: + case 24: return PICT_x8r8g8b8; + case 30: return PICT_x2r10g10b10; + case 32: return PICT_a8r8g8b8; + } +} + +static Bool +no_render_composite(struct sna *sna, + uint8_t op, + PicturePtr src, + PicturePtr mask, + PicturePtr dst, + int16_t src_x, int16_t src_y, + int16_t mask_x, int16_t mask_y, + int16_t dst_x, int16_t dst_y, + int16_t width, int16_t height, + struct sna_composite_op *tmp) +{ + DBG(("%s ()\n", __FUNCTION__)); + + if (mask == NULL && + sna_blt_composite(sna, + op, src, dst, + src_x, src_y, + dst_x, dst_y, + width, height, + tmp)) + return TRUE; + + return FALSE; +} + +static Bool +no_render_copy_boxes(struct sna *sna, uint8_t alu, + PixmapPtr src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy, + PixmapPtr dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, + const BoxRec *box, int n) +{ + DBG(("%s (n=%d)\n", __FUNCTION__, n)); + + return sna_blt_copy_boxes(sna, alu, + src_bo, src_dx, src_dy, + dst_bo, dst_dx, dst_dy, + dst->drawable.bitsPerPixel, + box, n); +} + +static Bool +no_render_copy(struct sna *sna, uint8_t alu, + PixmapPtr src, struct kgem_bo *src_bo, + PixmapPtr dst, struct kgem_bo *dst_bo, + struct sna_copy_op *tmp) +{ + DBG(("%s ()\n", __FUNCTION__)); + + if (src->drawable.bitsPerPixel != dst->drawable.bitsPerPixel && + sna_blt_copy(sna, alu, + src_bo, dst_bo, dst->drawable.bitsPerPixel, + tmp)) + return TRUE; + + return FALSE; +} + +static Bool +no_render_fill_boxes(struct sna *sna, + CARD8 op, + PictFormat format, + const xRenderColor *color, + PixmapPtr dst, struct kgem_bo *dst_bo, + const BoxRec *box, int n) +{ + uint8_t alu = GXcopy; + uint32_t pixel; + + DBG(("%s (op=%d, color=(%04x,%04x,%04x, %04x))\n", + __FUNCTION__, op, + color->red, color->green, color->blue, color->alpha)); + + if (color == 0) + op = PictOpClear; + + if (op == PictOpClear) { + alu = GXclear; + op = PictOpSrc; + } + + if (op == PictOpOver) { + if ((color->alpha >= 0xff00)) + op = PictOpSrc; + } + + if (op != PictOpSrc) + return FALSE; + + if (!sna_get_pixel_from_rgba(&pixel, + color->red, + color->green, + color->blue, + color->alpha, + format)) + return FALSE; + + return sna_blt_fill_boxes(sna, alu, + dst_bo, dst->drawable.bitsPerPixel, + pixel, box, n); +} + +static Bool +no_render_fill(struct sna *sna, uint8_t alu, + PixmapPtr dst, struct kgem_bo *dst_bo, + uint32_t color, + struct sna_fill_op *tmp) +{ + DBG(("%s (alu=%d, color=%08x)\n", __FUNCTION__, alu, color)); + return sna_blt_fill(sna, alu, + dst_bo, dst->drawable.bitsPerPixel, + color, + tmp); +} + +static void no_render_reset(struct sna *sna) +{ +} + +static void no_render_flush(struct sna *sna) +{ +} + +static void +no_render_context_switch(struct sna *sna, + int new_mode) +{ +} + +static void +no_render_fini(struct sna *sna) +{ +} + +void no_render_init(struct sna *sna) +{ + struct sna_render *render = &sna->render; + + render->composite = no_render_composite; + + render->copy_boxes = no_render_copy_boxes; + render->copy = no_render_copy; + + render->fill_boxes = no_render_fill_boxes; + render->fill = no_render_fill; + + render->reset = no_render_reset; + render->flush = no_render_flush; + render->context_switch = no_render_context_switch; + render->fini = no_render_fini; + + if (sna->kgem.gen >= 60) + sna->kgem.ring = KGEM_BLT; +} + +static Bool +move_to_gpu(PixmapPtr pixmap, const BoxPtr box) +{ + struct sna_pixmap *priv; + int count, w, h; + + if (pixmap->usage_hint) + return FALSE; + + w = box->x2 - box->x1; + h = box->y2 - box->y1; + if (w == pixmap->drawable.width || h == pixmap->drawable.height) + return TRUE; + + count = SOURCE_BIAS; + priv = sna_pixmap(pixmap); + if (priv) + count = ++priv->source_count; + + DBG(("%s: migrate box (%d, %d), (%d, %d)? source count=%d, fraction=%d/%d [%d]\n", + __FUNCTION__, + box->x1, box->y1, box->x2, box->y2, + count, w*h, + pixmap->drawable.width * pixmap->drawable.height, + pixmap->drawable.width * pixmap->drawable.height / (w*h))); + + return count*w*h >= pixmap->drawable.width * pixmap->drawable.height; +} + +static Bool +texture_is_cpu(PixmapPtr pixmap, const BoxPtr box) +{ + struct sna_pixmap *priv = sna_pixmap(pixmap); + + if (priv == NULL) + return TRUE; + + if (priv->gpu_only) + return FALSE; + + if (priv->gpu_bo == NULL) + return TRUE; + + if (!priv->cpu_damage) + return FALSE; + + if (sna_damage_contains_box(priv->gpu_damage, box) != PIXMAN_REGION_OUT) + return FALSE; + + return sna_damage_contains_box(priv->cpu_damage, box) != PIXMAN_REGION_OUT; +} + +static struct kgem_bo *upload(struct sna *sna, + struct sna_composite_channel *channel, + PixmapPtr pixmap, + int16_t x, int16_t y, int16_t w, int16_t h, + BoxPtr box) +{ + struct kgem_bo *bo; + + DBG(("%s: origin=(%d, %d), box=(%d, %d), (%d, %d), pixmap=%dx%d\n", + __FUNCTION__, x, y, box->x1, box->y1, box->x2, box->y2, pixmap->drawable.width, pixmap->drawable.height)); + + bo = kgem_upload_source_image(&sna->kgem, + pixmap->devPrivate.ptr, + box->x1, box->y1, w, h, + pixmap->devKind, + pixmap->drawable.bitsPerPixel); + if (bo) { + channel->offset[0] -= box->x1; + channel->offset[1] -= box->y1; + channel->scale[0] = 1./w; + channel->scale[1] = 1./h; + channel->width = w; + channel->height = h; + } + + return bo; +} + +int +sna_render_pixmap_bo(struct sna *sna, + struct sna_composite_channel *channel, + PixmapPtr pixmap, + int16_t x, int16_t y, + int16_t w, int16_t h, + int16_t dst_x, int16_t dst_y) +{ + struct kgem_bo *bo = NULL; + struct sna_pixmap *priv; + BoxRec box; + + DBG(("%s (%d, %d)x(%d, %d)\n", __FUNCTION__, x, y, w,h)); + + /* XXX handle transformed repeat */ + if (w == 0 || h == 0 || channel->transform) { + box.x1 = box.y1 = 0; + box.x2 = pixmap->drawable.width; + box.y2 = pixmap->drawable.height; + } else { + box.x1 = x; + box.y1 = y; + box.x2 = x + w; + box.y2 = y + h; + + if (channel->repeat != RepeatNone) { + if (box.x1 < 0 || + box.y1 < 0 || + box.x2 > pixmap->drawable.width || + box.y2 > pixmap->drawable.height) { + box.x1 = box.y1 = 0; + box.x2 = pixmap->drawable.width; + box.y2 = pixmap->drawable.height; + } + } else { + if (box.x1 < 0) + box.x1 = 0; + if (box.y1 < 0) + box.y1 = 0; + if (box.x2 > pixmap->drawable.width) + box.x2 = pixmap->drawable.width; + if (box.y2 > pixmap->drawable.height) + box.y2 = pixmap->drawable.height; + } + } + + w = box.x2 - box.x1; + h = box.y2 - box.y1; + DBG(("%s box=(%d, %d), (%d, %d): (%d, %d)/(%d, %d)\n", __FUNCTION__, + box.x1, box.y1, box.x2, box.y2, w, h, + pixmap->drawable.width, pixmap->drawable.height)); + if (w <= 0 || h <= 0) { + DBG(("%s: sample extents outside of texture -> clear\n", + __FUNCTION__)); + return 0; + } + + channel->height = pixmap->drawable.height; + channel->width = pixmap->drawable.width; + channel->scale[0] = 1. / pixmap->drawable.width; + channel->scale[1] = 1. / pixmap->drawable.height; + channel->offset[0] = x - dst_x; + channel->offset[1] = y - dst_y; + + DBG(("%s: offset=(%d, %d), size=(%d, %d)\n", + __FUNCTION__, + channel->offset[0], channel->offset[1], + pixmap->drawable.width, pixmap->drawable.height)); + + if (texture_is_cpu(pixmap, &box) && !move_to_gpu(pixmap, &box)) { + /* If we are using transient data, it is better to copy + * to an amalgamated upload buffer so that we don't + * stall on releasing the cpu bo immediately upon + * completion of the operation. + */ + if (pixmap->usage_hint != CREATE_PIXMAP_USAGE_SCRATCH_HEADER && + w * pixmap->drawable.bitsPerPixel * h > 8*4096) { + priv = sna_pixmap_attach(pixmap); + bo = pixmap_vmap(&sna->kgem, pixmap); + if (bo) + bo = kgem_bo_reference(bo); + } + + if (bo == NULL) { + DBG(("%s: uploading CPU box\n", __FUNCTION__)); + bo = upload(sna, channel, pixmap, x,y, w,h, &box); + } + } + + if (bo == NULL) { + priv = sna_pixmap_force_to_gpu(pixmap); + if (priv) + bo = kgem_bo_reference(priv->gpu_bo); + else + bo = upload(sna, channel, pixmap, x,y, w,h, &box); + } + + channel->bo = bo; + return bo != NULL; +} + +int +sna_render_picture_extract(struct sna *sna, + PicturePtr picture, + struct sna_composite_channel *channel, + int16_t x, int16_t y, + int16_t w, int16_t h, + int16_t dst_x, int16_t dst_y) +{ + struct kgem_bo *bo = NULL; + PixmapPtr pixmap = get_drawable_pixmap(picture->pDrawable); + int16_t ox, oy; + BoxRec box; + +#if NO_EXTRACT + return -1; +#endif + + DBG(("%s (%d, %d)x(%d, %d) [dst=(%d, %d)]\n", + __FUNCTION__, x, y, w, h, dst_x, dst_y)); + + if (w == 0 || h == 0) { + DBG(("%s: fallback -- unknown bounds\n", __FUNCTION__)); + return -1; + } + + ox = box.x1 = x; + oy = box.y1 = y; + box.x2 = x + w; + box.y2 = y + h; + if (channel->transform) { + pixman_vector_t v; + + pixman_transform_bounds(channel->transform, &box); + + v.vector[0] = ox << 16; + v.vector[1] = oy << 16; + v.vector[2] = 1 << 16; + pixman_transform_point(channel->transform, &v); + ox = v.vector[0] / v.vector[2]; + oy = v.vector[1] / v.vector[2]; + } + + if (channel->repeat != RepeatNone) { + if (box.x1 < 0 || + box.y1 < 0 || + box.x2 > pixmap->drawable.width || + box.y2 > pixmap->drawable.height) { + /* XXX tiled repeats? */ + box.x1 = box.y1 = 0; + box.x2 = pixmap->drawable.width; + box.y2 = pixmap->drawable.height; + + if (!channel->is_affine) { + DBG(("%s: fallback -- repeating project transform too large for texture\n", + __FUNCTION__)); + return -1; + } + } + } else { + if (box.x1 < 0) + box.x1 = 0; + if (box.y1 < 0) + box.y1 = 0; + if (box.x2 > pixmap->drawable.width) + box.x2 = pixmap->drawable.width; + if (box.y2 > pixmap->drawable.height) + box.y2 = pixmap->drawable.height; + } + + w = box.x2 - box.x1; + h = box.y2 - box.y1; + DBG(("%s box=(%d, %d), (%d, %d): (%d, %d)/(%d, %d)\n", __FUNCTION__, + box.x1, box.y1, box.x2, box.y2, w, h, + pixmap->drawable.width, pixmap->drawable.height)); + if (w <= 0 || h <= 0) { + DBG(("%s: sample extents outside of texture -> clear\n", + __FUNCTION__)); + return 0; + } + + if (w > sna->render.max_3d_size || h > sna->render.max_3d_size) { + DBG(("%s: fallback -- sample too large for texture (%d, %d)x(%d, %d)\n", + __FUNCTION__, box.x1, box.y1, w, h)); + return -1; + } + + if (texture_is_cpu(pixmap, &box) && !move_to_gpu(pixmap, &box)) { + bo = kgem_upload_source_image(&sna->kgem, + pixmap->devPrivate.ptr, + box.x1, box.y1, w, h, + pixmap->devKind, + pixmap->drawable.bitsPerPixel); + if (!bo) { + DBG(("%s: failed to upload source image, using clear\n", + __FUNCTION__)); + return 0; + } + } else { + if (!sna_pixmap_move_to_gpu(pixmap)) { + DBG(("%s: falback -- pixmap is not on the GPU\n", + __FUNCTION__)); + return -1; + } + + bo = kgem_create_2d(&sna->kgem, w, h, + pixmap->drawable.bitsPerPixel, + kgem_choose_tiling(&sna->kgem, + I915_TILING_X, w, h, + pixmap->drawable.bitsPerPixel), + 0); + if (!bo) { + DBG(("%s: failed to create bo, using clear\n", + __FUNCTION__)); + return 0; + } + + if (!sna_blt_copy_boxes(sna, GXcopy, + sna_pixmap_get_bo(pixmap), 0, 0, + bo, -box.x1, -box.y1, + pixmap->drawable.bitsPerPixel, + &box, 1)) { + DBG(("%s: fallback -- unable to copy boxes\n", + __FUNCTION__)); + return -1; + } + } + + if (ox == x && oy == y) { + x = y = 0; + } else if (channel->transform) { + pixman_vector_t v; + pixman_transform_t m; + + v.vector[0] = (ox - box.x1) << 16; + v.vector[1] = (oy - box.y1) << 16; + v.vector[2] = 1 << 16; + pixman_transform_invert(&m, channel->transform); + pixman_transform_point(&m, &v); + x = v.vector[0] / v.vector[2]; + y = v.vector[1] / v.vector[2]; + } else { + x = ox - box.x1; + y = oy - box.y1; + } + + channel->offset[0] = x - dst_x; + channel->offset[1] = y - dst_y; + channel->scale[0] = 1./w; + channel->scale[1] = 1./h; + channel->width = w; + channel->height = h; + channel->bo = bo; + return 1; +} + +int +sna_render_picture_fixup(struct sna *sna, + PicturePtr picture, + struct sna_composite_channel *channel, + int16_t x, int16_t y, + int16_t w, int16_t h, + int16_t dst_x, int16_t dst_y) +{ + pixman_image_t *dst, *src; + uint32_t pitch; + int dx, dy; + void *ptr; + +#if NO_FIXUP + return -1; +#endif + + DBG(("%s: (%d, %d)x(%d, %d)\n", __FUNCTION__, x, y, w, h)); + + if (w == 0 || h == 0) { + DBG(("%s: fallback - unknown bounds\n", __FUNCTION__)); + return -1; + } + if (w > sna->render.max_3d_size || h > sna->render.max_3d_size) { + DBG(("%s: fallback - too large (%dx%d)\n", __FUNCTION__, w, h)); + return -1; + } + + if (PICT_FORMAT_RGB(picture->format) == 0) { + pitch = ALIGN(w, 4); + channel->pict_format = PIXMAN_a8; + } else { + pitch = sizeof(uint32_t)*w; + channel->pict_format = PIXMAN_a8r8g8b8; + } + if (channel->pict_format != picture->format) { + DBG(("%s: converting to %08x (pitch=%d) from %08x\n", + __FUNCTION__, channel->pict_format, pitch, picture->format)); + } + + channel->bo = kgem_create_buffer(&sna->kgem, + pitch*h, KGEM_BUFFER_WRITE, + &ptr); + if (!channel->bo) { + DBG(("%s: failed to create upload buffer, using clear\n", + __FUNCTION__)); + return 0; + } + + /* XXX Convolution filter? */ + memset(ptr, 0, pitch*h); + channel->bo->pitch = pitch; + + /* Composite in the original format to preserve idiosyncracies */ + if (picture->format == channel->pict_format) + dst = pixman_image_create_bits(picture->format, w, h, ptr, pitch); + else + dst = pixman_image_create_bits(picture->format, w, h, NULL, 0); + if (!dst) { + kgem_bo_destroy(&sna->kgem, channel->bo); + return 0; + } + + if (picture->pDrawable) + sna_drawable_move_to_cpu(picture->pDrawable, false); + + src = image_from_pict(picture, FALSE, &dx, &dy); + if (src == NULL) { + pixman_image_unref(dst); + kgem_bo_destroy(&sna->kgem, channel->bo); + return 0; + } + + DBG(("%s: compositing tmp=(%d+%d, %d+%d)x(%d, %d)\n", + __FUNCTION__, x, dx, y, dy, w, h)); + pixman_image_composite(PictOpSrc, src, NULL, dst, + x + dx, y + dy, + 0, 0, + 0, 0, + w, h); + free_pixman_pict(picture, src); + + /* Then convert to card format */ + if (picture->format != channel->pict_format) { + DBG(("%s: performing post-conversion %08x->%08x (%d, %d)\n", + __FUNCTION__, + picture->format, channel->pict_format, + w, h)); + + src = dst; + dst = pixman_image_create_bits(channel->pict_format, + w, h, ptr, pitch); + + pixman_image_composite(PictOpSrc, src, NULL, dst, + 0, 0, + 0, 0, + 0, 0, + w, h); + pixman_image_unref(src); + } + pixman_image_unref(dst); + + channel->width = w; + channel->height = h; + + channel->filter = PictFilterNearest; + channel->repeat = RepeatNone; + channel->is_affine = TRUE; + + channel->scale[0] = 1./w; + channel->scale[1] = 1./h; + channel->offset[0] = -dst_x; + channel->offset[1] = -dst_y; + channel->transform = NULL; + + return 1; +} + +int +sna_render_picture_convert(struct sna *sna, + PicturePtr picture, + struct sna_composite_channel *channel, + PixmapPtr pixmap, + int16_t x, int16_t y, + int16_t w, int16_t h, + int16_t dst_x, int16_t dst_y) +{ + uint32_t pitch; + pixman_image_t *src, *dst; + BoxRec box; + void *ptr; + +#if NO_CONVERT + return -1; +#endif + + box.x1 = x; + box.y1 = y; + box.x2 = x + w; + box.y2 = y + h; + + if (channel->transform) { + DBG(("%s: has transform, uploading whole surface\n", + __FUNCTION__)); + box.x1 = box.y1 = 0; + box.x2 = pixmap->drawable.width; + box.y2 = pixmap->drawable.height; + } + + if (box.x1 < 0) + box.x1 = 0; + if (box.y1 < 0) + box.y1 = 0; + if (box.x2 > pixmap->drawable.width) + box.x2 = pixmap->drawable.width; + if (box.y2 > pixmap->drawable.height) + box.y2 = pixmap->drawable.height; + + w = box.x2 - box.x1; + h = box.y2 - box.y1; + + DBG(("%s: convert (%d, %d)x(%d, %d), source size %dx%d\n", + __FUNCTION__, box.x1, box.y1, w, h, + pixmap->drawable.width, + pixmap->drawable.height)); + + sna_pixmap_move_to_cpu(pixmap, false); + + src = pixman_image_create_bits(picture->format, + pixmap->drawable.width, + pixmap->drawable.height, + pixmap->devPrivate.ptr, + pixmap->devKind); + if (!src) + return 0; + + if (PICT_FORMAT_RGB(picture->format) == 0) { + pitch = ALIGN(w, 4); + channel->pict_format = PIXMAN_a8; + DBG(("%s: converting to a8 (pitch=%d) from %08x\n", + __FUNCTION__, pitch, picture->format)); + } else { + pitch = sizeof(uint32_t)*w; + channel->pict_format = PIXMAN_a8r8g8b8; + DBG(("%s: converting to a8r8g8b8 (pitch=%d) from %08x\n", + __FUNCTION__, pitch, picture->format)); + } + + channel->bo = kgem_create_buffer(&sna->kgem, + pitch*h, KGEM_BUFFER_WRITE, + &ptr); + if (!channel->bo) { + pixman_image_unref(src); + return 0; + } + + channel->bo->pitch = pitch; + dst = pixman_image_create_bits(channel->pict_format, w, h, ptr, pitch); + if (!dst) { + kgem_bo_destroy(&sna->kgem, channel->bo); + pixman_image_unref(src); + return 0; + } + + pixman_image_composite(PictOpSrc, src, NULL, dst, + box.x1, box.y1, + 0, 0, + 0, 0, + w, h); + pixman_image_unref(dst); + pixman_image_unref(src); + + channel->width = w; + channel->height = h; + + channel->scale[0] = 1. / w; + channel->scale[1] = 1. / h; + channel->offset[0] = x - dst_x - box.x1; + channel->offset[1] = y - dst_y - box.y1; + + DBG(("%s: offset=(%d, %d), size=(%d, %d) ptr[0]=%08x\n", + __FUNCTION__, + channel->offset[0], channel->offset[1], + channel->width, channel->height, + *(uint32_t*)ptr)); + return 1; +} + +Bool +sna_render_composite_redirect(struct sna *sna, + struct sna_composite_op *op, + int x, int y, int width, int height) +{ + struct sna_composite_redirect *t = &op->redirect; + int bpp = op->dst.pixmap->drawable.bitsPerPixel; + struct sna_pixmap *priv; + struct kgem_bo *bo; + +#if NO_REDIRECT + return FALSE; +#endif + + DBG(("%s: target too large (%dx%d), copying to temporary %dx%d\n", + __FUNCTION__, op->dst.width, op->dst.height, width,height)); + + if (!width || !height) + return FALSE; + + priv = sna_pixmap(op->dst.pixmap); + if (priv->gpu_bo == NULL) { + DBG(("%s: fallback -- no GPU bo attached\n", __FUNCTION__)); + return FALSE; + } + + if (!sna_pixmap_move_to_gpu(op->dst.pixmap)) + return FALSE; + + /* We can process the operation in a single pass, + * but the target is too large for the 3D pipeline. + * Copy into a smaller surface and replace afterwards. + */ + bo = kgem_create_2d(&sna->kgem, + width, height, bpp, + kgem_choose_tiling(&sna->kgem, I915_TILING_X, + width, height, bpp), + 0); + if (!bo) + return FALSE; + + t->box.x1 = x + op->dst.x; + t->box.y1 = y + op->dst.y; + t->box.x2 = t->box.x1 + width; + t->box.y2 = t->box.y1 + height; + + DBG(("%s: original box (%d, %d), (%d, %d)\n", + __FUNCTION__, t->box.x1, t->box.y1, t->box.x2, t->box.y2)); + + if (!sna_blt_copy_boxes(sna, GXcopy, + op->dst.bo, 0, 0, + bo, -t->box.x1, -t->box.y1, + bpp, &t->box, 1)) { + kgem_bo_destroy(&sna->kgem, bo); + return FALSE; + } + + t->real_bo = priv->gpu_bo; + op->dst.bo = bo; + op->dst.x = -x; + op->dst.y = -y; + op->dst.width = width; + op->dst.height = height; + op->damage = &priv->gpu_damage; + return TRUE; +} + +void +sna_render_composite_redirect_done(struct sna *sna, + const struct sna_composite_op *op) +{ + const struct sna_composite_redirect *t = &op->redirect; + + if (t->real_bo) { + DBG(("%s: copying temporary to dst\n", __FUNCTION__)); + + sna_blt_copy_boxes(sna, GXcopy, + op->dst.bo, -t->box.x1, -t->box.y1, + t->real_bo, 0, 0, + op->dst.pixmap->drawable.bitsPerPixel, + &t->box, 1); + + kgem_bo_destroy(&sna->kgem, op->dst.bo); + } +} diff --git a/src/sna/sna_render.h b/src/sna/sna_render.h new file mode 100644 index 00000000..328eaf78 --- /dev/null +++ b/src/sna/sna_render.h @@ -0,0 +1,511 @@ +#ifndef SNA_RENDER_H +#define SNA_RENDER_H + +#define GRADIENT_CACHE_SIZE 16 + +#define fastcall __attribute__((regparm(3))) + +struct sna; +struct sna_glyph; +struct sna_video; +struct sna_video_frame; + +struct sna_composite_rectangles { + struct sna_coordinate { + int16_t x, y; + } src, mask, dst; + int16_t width, height; +}; + +struct sna_composite_op { + fastcall void (*blt)(struct sna *sna, const struct sna_composite_op *op, + const struct sna_composite_rectangles *r); + void (*boxes)(struct sna *sna, const struct sna_composite_op *op, + const BoxRec *box, int nbox); + void (*done)(struct sna *sna, const struct sna_composite_op *op); + + struct sna_damage **damage; + + uint32_t op; + + struct { + PixmapPtr pixmap; + CARD32 format; + struct kgem_bo *bo; + int16_t x, y; + uint16_t width, height; + } dst; + + struct sna_composite_channel { + struct kgem_bo *bo; + PictTransform *transform; + uint16_t width; + uint16_t height; + uint32_t pict_format; + uint32_t card_format; + uint32_t filter; + uint32_t repeat; + uint32_t is_affine : 1; + uint32_t is_solid : 1; + uint32_t is_opaque : 1; + uint32_t alpha_fixup : 1; + uint32_t rb_reversed : 1; + int16_t offset[2]; + float scale[2]; + + struct gen3_shader_channel { + int type; + uint32_t mode; + uint32_t constants; + } gen3; + } src, mask; + uint32_t is_affine : 1; + uint32_t has_component_alpha : 1; + uint32_t need_magic_ca_pass : 1; + uint32_t rb_reversed : 1; + + int floats_per_vertex; + fastcall void (*prim_emit)(struct sna *sna, + const struct sna_composite_op *op, + const struct sna_composite_rectangles *r); + + struct sna_composite_redirect { + struct kgem_bo *real_bo; + BoxRec box; + } redirect; + + union { + struct sna_blt_state { + PixmapPtr src_pixmap; + int16_t sx, sy; + + uint32_t inplace :1; + uint32_t overwrites:1; + + int hdr; + uint32_t cmd; + uint32_t br13; + uint32_t pitch[2]; + uint32_t pixel; + struct kgem_bo *bo[3]; + } blt; + + struct { + int nothing; + } gen2; + + struct { + float constants[8]; + uint32_t num_constants; + } gen3; + + struct { + int wm_kernel; + int ve_id; + } gen4; + + struct { + int wm_kernel; + int ve_id; + } gen5; + + struct { + int wm_kernel; + int nr_surfaces; + int nr_inputs; + int ve_id; + } gen6; + + void *priv; + } u; +}; + +struct sna_composite_spans_op { + struct sna_composite_op base; + + void (*boxes)(struct sna *sna, const struct sna_composite_spans_op *op, + const BoxRec *box, int nbox, + float opacity); + void (*done)(struct sna *sna, const struct sna_composite_spans_op *op); + + void (*prim_emit)(struct sna *sna, + const struct sna_composite_spans_op *op, + const BoxRec *box, + float opacity); +}; + +struct sna_fill_op { + struct sna_composite_op base; + + void (*blt)(struct sna *sna, const struct sna_fill_op *op, + int16_t x, int16_t y, int16_t w, int16_t h); + void (*done)(struct sna *sna, const struct sna_fill_op *op); +}; + +struct sna_copy_op { + struct sna_composite_op base; + + void (*blt)(struct sna *sna, const struct sna_copy_op *op, + int16_t sx, int16_t sy, + int16_t w, int16_t h, + int16_t dx, int16_t dy); + void (*done)(struct sna *sna, const struct sna_copy_op *op); +}; + +struct sna_render { + int max_3d_size; + + Bool (*composite)(struct sna *sna, uint8_t op, + PicturePtr dst, PicturePtr src, PicturePtr mask, + int16_t src_x, int16_t src_y, + int16_t msk_x, int16_t msk_y, + int16_t dst_x, int16_t dst_y, + int16_t w, int16_t h, + struct sna_composite_op *tmp); + + Bool (*composite_spans)(struct sna *sna, uint8_t op, + PicturePtr dst, PicturePtr src, + int16_t src_x, int16_t src_y, + int16_t dst_x, int16_t dst_y, + int16_t w, int16_t h, + struct sna_composite_spans_op *tmp); + + Bool (*video)(struct sna *sna, + struct sna_video *video, + struct sna_video_frame *frame, + RegionPtr dstRegion, + short src_w, short src_h, + short drw_w, short drw_h, + PixmapPtr pixmap); + + Bool (*fill_boxes)(struct sna *sna, + CARD8 op, + PictFormat format, + const xRenderColor *color, + PixmapPtr dst, struct kgem_bo *dst_bo, + const BoxRec *box, int n); + Bool (*fill)(struct sna *sna, uint8_t alu, + PixmapPtr dst, struct kgem_bo *dst_bo, + uint32_t color, + struct sna_fill_op *tmp); + + Bool (*copy_boxes)(struct sna *sna, uint8_t alu, + PixmapPtr src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy, + PixmapPtr dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, + const BoxRec *box, int n); + Bool (*copy)(struct sna *sna, uint8_t alu, + PixmapPtr src, struct kgem_bo *src_bo, + PixmapPtr dst, struct kgem_bo *dst_bo, + struct sna_copy_op *op); + + void (*flush)(struct sna *sna); + void (*reset)(struct sna *sna); + void (*context_switch)(struct sna *sna, int new_mode); + void (*fini)(struct sna *sna); + + struct sna_solid_cache { + struct kgem_bo *cache_bo; + uint32_t color[1024]; + struct kgem_bo *bo[1024]; + int last; + int size; + int dirty; + } solid_cache; + + struct { + struct sna_gradient_cache { + struct kgem_bo *bo; + int nstops; + PictGradientStop *stops; + } cache[GRADIENT_CACHE_SIZE]; + int size; + } gradient_cache; + + struct sna_glyph_cache{ + PicturePtr picture; + struct sna_glyph **glyphs; + uint16_t count; + uint16_t evict; + } glyph[2]; + + uint16_t vertex_start; + uint16_t vertex_index; + uint16_t vertex_used; + uint16_t vertex_reloc[8]; + + float vertex_data[16*1024]; + const struct sna_composite_op *op; +}; + +struct gen2_render_state { + Bool need_invariant; + uint16_t vertex_offset; +}; + +struct gen3_render_state { + uint32_t current_dst; + Bool need_invariant; + uint32_t tex_count; + uint32_t last_drawrect_limit; + uint32_t last_target; + uint32_t last_blend; + uint32_t last_constants; + uint32_t last_sampler; + uint32_t last_shader; + uint32_t last_diffuse; + uint32_t last_specular; + + uint16_t vertex_offset; + uint16_t last_vertex_offset; + uint16_t floats_per_vertex; + uint16_t last_floats_per_vertex; + + uint32_t tex_map[4]; + uint32_t tex_handle[2]; + uint32_t tex_delta[2]; +}; + +struct gen4_render_state { + struct kgem_bo *general_bo; + + uint32_t vs; + uint32_t sf[2]; + uint32_t wm; + uint32_t cc; + + int ve_id; + uint32_t drawrect_offset; + uint32_t drawrect_limit; + uint32_t vb_id; + uint16_t vertex_offset; + uint16_t last_primitive; + int16_t floats_per_vertex; + uint16_t surface_table; + uint16_t last_pipelined_pointers; + + Bool needs_invariant; +}; + +struct gen5_render_state { + struct kgem_bo *general_bo; + + uint32_t vs; + uint32_t sf[2]; + uint32_t wm; + uint32_t cc; + + int ve_id; + uint32_t drawrect_offset; + uint32_t drawrect_limit; + uint32_t vb_id; + uint16_t vertex_offset; + uint16_t last_primitive; + int16_t floats_per_vertex; + uint16_t surface_table; + uint16_t last_pipelined_pointers; + + Bool needs_invariant; +}; + +enum { + GEN6_WM_KERNEL_NOMASK = 0, + GEN6_WM_KERNEL_NOMASK_PROJECTIVE, + + GEN6_WM_KERNEL_MASK, + GEN6_WM_KERNEL_MASK_PROJECTIVE, + + GEN6_WM_KERNEL_MASKCA, + GEN6_WM_KERNEL_MASKCA_PROJECTIVE, + + GEN6_WM_KERNEL_MASKCA_SRCALPHA, + GEN6_WM_KERNEL_MASKCA_SRCALPHA_PROJECTIVE, + + GEN6_WM_KERNEL_VIDEO_PLANAR, + GEN6_WM_KERNEL_VIDEO_PACKED, + GEN6_KERNEL_COUNT +}; + +struct gen6_render_state { + struct kgem_bo *general_bo; + + uint32_t vs_state; + uint32_t sf_state; + uint32_t sf_mask_state; + uint32_t wm_state; + uint32_t wm_kernel[GEN6_KERNEL_COUNT]; + + uint32_t cc_vp; + uint32_t cc_blend; + + uint32_t drawrect_offset; + uint32_t drawrect_limit; + uint32_t blend; + uint32_t samplers; + uint32_t kernel; + + uint16_t num_sf_outputs; + uint16_t vb_id; + uint16_t ve_id; + uint16_t vertex_offset; + uint16_t last_primitive; + int16_t floats_per_vertex; + uint16_t surface_table; + + Bool needs_invariant; +}; + +struct sna_static_stream { + uint32_t size, used; + uint8_t *data; +}; + +int sna_static_stream_init(struct sna_static_stream *stream); +uint32_t sna_static_stream_add(struct sna_static_stream *stream, + const void *data, uint32_t len, uint32_t align); +void *sna_static_stream_map(struct sna_static_stream *stream, + uint32_t len, uint32_t align); +uint32_t sna_static_stream_offsetof(struct sna_static_stream *stream, + void *ptr); +struct kgem_bo *sna_static_stream_fini(struct sna *sna, + struct sna_static_stream *stream); + +struct kgem_bo * +sna_render_get_solid(struct sna *sna, + uint32_t color); + +void +sna_render_flush_solid(struct sna *sna); + +struct kgem_bo * +sna_render_get_gradient(struct sna *sna, + PictGradient *pattern); + +uint32_t sna_rgba_for_color(uint32_t color, int depth); +Bool sna_picture_is_solid(PicturePtr picture, uint32_t *color); + +void no_render_init(struct sna *sna); + +#ifdef SNA_GEN2 +Bool gen2_render_init(struct sna *sna); +#else +static inline Bool gen2_render_init(struct sna *sna) { return FALSE; } +#endif + +#ifdef SNA_GEN3 +Bool gen3_render_init(struct sna *sna); +#else +static inline Bool gen3_render_init(struct sna *sna) { return FALSE; } +#endif + +#ifdef SNA_GEN4 +Bool gen4_render_init(struct sna *sna); +#else +static inline Bool gen4_render_init(struct sna *sna) { return FALSE; } +#endif + +#ifdef SNA_GEN5 +Bool gen5_render_init(struct sna *sna); +#else +static inline Bool gen5_render_init(struct sna *sna) { return FALSE; } +#endif + +#ifdef SNA_GEN6 +Bool gen6_render_init(struct sna *sna); +#else +static inline Bool gen6_render_init(struct sna *sna) { return FALSE; } +#endif + +Bool sna_tiling_composite(struct sna *sna, + uint32_t op, + PicturePtr src, + PicturePtr mask, + PicturePtr dst, + int16_t src_x, int16_t src_y, + int16_t mask_x, int16_t mask_y, + int16_t dst_x, int16_t dst_y, + int16_t width, int16_t height, + struct sna_composite_op *tmp); + +Bool sna_blt_composite(struct sna *sna, + uint32_t op, + PicturePtr src, + PicturePtr dst, + int16_t src_x, int16_t src_y, + int16_t dst_x, int16_t dst_y, + int16_t width, int16_t height, + struct sna_composite_op *tmp); + +bool sna_blt_fill(struct sna *sna, uint8_t alu, + struct kgem_bo *bo, + int bpp, + uint32_t pixel, + struct sna_fill_op *fill); + +bool sna_blt_copy(struct sna *sna, uint8_t alu, + struct kgem_bo *src, + struct kgem_bo *dst, + int bpp, + struct sna_copy_op *copy); + +Bool sna_blt_fill_boxes(struct sna *sna, uint8_t alu, + struct kgem_bo *bo, + int bpp, + uint32_t pixel, + const BoxRec *box, int n); + +Bool sna_blt_copy_boxes(struct sna *sna, uint8_t alu, + struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy, + struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, + int bpp, + const BoxRec *box, int n); + +Bool sna_get_pixel_from_rgba(uint32_t *pixel, + uint16_t red, + uint16_t green, + uint16_t blue, + uint16_t alpha, + uint32_t format); + +int +sna_render_pixmap_bo(struct sna *sna, + struct sna_composite_channel *channel, + PixmapPtr pixmap, + int16_t x, int16_t y, + int16_t w, int16_t h, + int16_t dst_x, int16_t dst_y); + +int +sna_render_picture_extract(struct sna *sna, + PicturePtr picture, + struct sna_composite_channel *channel, + int16_t x, int16_t y, + int16_t w, int16_t h, + int16_t dst_x, int16_t dst_y); + +int +sna_render_picture_fixup(struct sna *sna, + PicturePtr picture, + struct sna_composite_channel *channel, + int16_t x, int16_t y, + int16_t w, int16_t h, + int16_t dst_x, int16_t dst_y); + +int +sna_render_picture_convert(struct sna *sna, + PicturePtr picture, + struct sna_composite_channel *channel, + PixmapPtr pixmap, + int16_t x, int16_t y, + int16_t w, int16_t h, + int16_t dst_x, int16_t dst_y); + +Bool +sna_render_composite_redirect(struct sna *sna, + struct sna_composite_op *op, + int x, int y, int width, int height); + +void +sna_render_composite_redirect_done(struct sna *sna, + const struct sna_composite_op *op); + +#endif /* SNA_RENDER_H */ diff --git a/src/sna/sna_render_inline.h b/src/sna/sna_render_inline.h new file mode 100644 index 00000000..33d84d4c --- /dev/null +++ b/src/sna/sna_render_inline.h @@ -0,0 +1,102 @@ +#ifndef SNA_RENDER_INLINE_H +#define SNA_RENDER_INLINE_H + +static inline bool need_tiling(struct sna *sna, int16_t width, int16_t height) +{ + /* Is the damage area too large to fit in 3D pipeline, + * and so do we need to split the operation up into tiles? + */ + return (width > sna->render.max_3d_size || + height > sna->render.max_3d_size); +} + +static inline bool need_redirect(struct sna *sna, PixmapPtr dst) +{ + /* Is the pixmap too large to render to? */ + return (dst->drawable.width > sna->render.max_3d_size || + dst->drawable.height > sna->render.max_3d_size); +} + +static inline int vertex_space(struct sna *sna) +{ + return ARRAY_SIZE(sna->render.vertex_data) - sna->render.vertex_used; +} +static inline void vertex_emit(struct sna *sna, float v) +{ + sna->render.vertex_data[sna->render.vertex_used++] = v; +} +static inline void vertex_emit_2s(struct sna *sna, int16_t x, int16_t y) +{ + int16_t *v = (int16_t *)&sna->render.vertex_data[sna->render.vertex_used++]; + v[0] = x; + v[1] = y; +} + +static inline float pack_2s(int16_t x, int16_t y) +{ + union { + struct sna_coordinate p; + float f; + } u; + u.p.x = x; + u.p.y = y; + return u.f; +} + +static inline int batch_space(struct sna *sna) +{ + return KGEM_BATCH_SIZE(&sna->kgem) - sna->kgem.nbatch; +} + +static inline void batch_emit(struct sna *sna, uint32_t dword) +{ + sna->kgem.batch[sna->kgem.nbatch++] = dword; +} + +static inline void batch_emit_float(struct sna *sna, float f) +{ + union { + uint32_t dw; + float f; + } u; + u.f = f; + batch_emit(sna, u.dw); +} + +static inline Bool +is_gpu(DrawablePtr drawable) +{ + struct sna_pixmap *priv = sna_pixmap_from_drawable(drawable); + return priv && priv->gpu_bo; +} + +static inline Bool +is_cpu(DrawablePtr drawable) +{ + struct sna_pixmap *priv = sna_pixmap_from_drawable(drawable); + return !priv || priv->gpu_bo == NULL; +} + +static inline Bool +is_dirty_gpu(struct sna *sna, DrawablePtr drawable) +{ + struct sna_pixmap *priv = sna_pixmap_from_drawable(drawable); + return priv && priv->gpu_bo && priv->gpu_damage; +} + +static inline Bool +too_small(struct sna *sna, DrawablePtr drawable) +{ + return (drawable->width * drawable->height <= 256) && + !is_dirty_gpu(sna, drawable); +} + +static inline Bool +picture_is_gpu(PicturePtr picture) +{ + if (!picture || !picture->pDrawable) + return FALSE; + return is_gpu(picture->pDrawable); +} + +#endif /* SNA_RENDER_INLINE_H */ diff --git a/src/sna/sna_stream.c b/src/sna/sna_stream.c new file mode 100644 index 00000000..d6d817d3 --- /dev/null +++ b/src/sna/sna_stream.c @@ -0,0 +1,99 @@ +/* + * Copyright © 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Chris Wilson <chris@chris-wilson.co.uk> + * + */ + +#include "sna.h" +#include "sna_render.h" + +#if DEBUG_STREAM +#undef DBG +#define DBG(x) ErrorF x +#endif + +int sna_static_stream_init(struct sna_static_stream *stream) +{ + stream->used = 0; + stream->size = 64*1024; + + stream->data = malloc(stream->size); + return stream->data != NULL; +} + +static uint32_t sna_static_stream_alloc(struct sna_static_stream *stream, + uint32_t len, uint32_t align) +{ + uint32_t offset = ALIGN(stream->used, align); + uint32_t size = offset + len; + + if (size > stream->size) { + do + stream->size *= 2; + while (stream->size < size); + + stream->data = realloc(stream->data, stream->size); + } + + stream->used = size; + return offset; +} + +uint32_t sna_static_stream_add(struct sna_static_stream *stream, + const void *data, uint32_t len, uint32_t align) +{ + uint32_t offset = sna_static_stream_alloc(stream, len, align); + memcpy(stream->data + offset, data, len); + return offset; +} + +void *sna_static_stream_map(struct sna_static_stream *stream, + uint32_t len, uint32_t align) +{ + uint32_t offset = sna_static_stream_alloc(stream, len, align); + return memset(stream->data + offset, 0, len); +} + +uint32_t sna_static_stream_offsetof(struct sna_static_stream *stream, void *ptr) +{ + return (uint8_t *)ptr - stream->data; +} + +struct kgem_bo *sna_static_stream_fini(struct sna *sna, + struct sna_static_stream *stream) +{ + struct kgem_bo *bo; + + DBG(("uploaded %d bytes of static state\n", stream->used)); + + bo = kgem_create_linear(&sna->kgem, stream->used); + if (bo && !kgem_bo_write(&sna->kgem, bo, stream->data, stream->used)) { + kgem_bo_destroy(&sna->kgem, bo); + return NULL; + } + + free(stream->data); + + return bo; +} diff --git a/src/sna/sna_tiling.c b/src/sna/sna_tiling.c new file mode 100644 index 00000000..f69c3ef2 --- /dev/null +++ b/src/sna/sna_tiling.c @@ -0,0 +1,264 @@ +/* + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Chris Wilson <chris@chris-wilson.co.uk> + * + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "sna.h" +#include "sna_render.h" + +#include <fbpict.h> + +#if DEBUG_RENDER +#undef DBG +#define DBG(x) ErrorF x +#else +#define NDEBUG 1 +#endif + +struct sna_tile_state { + int op; + PicturePtr src, mask, dst; + PixmapPtr dst_pixmap; + uint32_t dst_format; + int16_t src_x, src_y; + int16_t mask_x, mask_y; + int16_t dst_x, dst_y; + int16_t width, height; + + int rect_count; + int rect_size; + struct sna_composite_rectangles rects_embedded[16], *rects; +}; + +static void +sna_tiling_composite_add_rect(struct sna_tile_state *tile, + const struct sna_composite_rectangles *r) +{ + if (tile->rect_count == tile->rect_size) { + struct sna_composite_rectangles *a; + int newsize = tile->rect_size * 2; + + if (tile->rects == tile->rects_embedded) { + a = malloc (sizeof(struct sna_composite_rectangles) * newsize); + if (a == NULL) + return; + + memcpy(a, + tile->rects_embedded, + sizeof(struct sna_composite_rectangles) * tile->rect_count); + } else { + a = realloc(tile->rects, + sizeof(struct sna_composite_rectangles) * newsize); + if (a == NULL) + return; + } + + tile->rects = a; + tile->rect_size = newsize; + } + + tile->rects[tile->rect_count++] = *r; +} + +fastcall static void +sna_tiling_composite_blt(struct sna *sna, + const struct sna_composite_op *op, + const struct sna_composite_rectangles *r) +{ + sna_tiling_composite_add_rect(op->u.priv, r); +} + +static void +sna_tiling_composite_boxes(struct sna *sna, + const struct sna_composite_op *op, + const BoxRec *box, int nbox) +{ + while (nbox--) { + struct sna_composite_rectangles r; + + r.dst.x = box->x1; + r.dst.y = box->y1; + r.mask = r.src = r.dst; + + r.width = box->x2 - box->x1; + r.height = box->y2 - box->y1; + + sna_tiling_composite_add_rect(op->u.priv, &r); + box++; + } +} + +static void +sna_tiling_composite_done(struct sna *sna, + const struct sna_composite_op *op) +{ + struct sna_tile_state *tile = op->u.priv; + struct sna_composite_op tmp; + int x, y, n, step = sna->render.max_3d_size; + + DBG(("%s -- %dx%d, count=%d\n", __FUNCTION__, + tile->width, tile->height, tile->rect_count)); + + if (tile->rect_count == 0) + goto done; + + for (y = 0; y < tile->height; y += step) { + int height = step; + if (y + height > tile->height) + height = tile->height - y; + for (x = 0; x < tile->width; x += step) { + int width = step; + if (x + width > tile->width) + width = tile->width - x; + memset(&tmp, 0, sizeof(tmp)); + if (sna->render.composite(sna, tile->op, + tile->src, tile->mask, tile->dst, + tile->src_x + x, tile->src_y + y, + tile->mask_x + x, tile->mask_y + y, + tile->dst_x + x, tile->dst_y + y, + width, height, + &tmp)) { + for (n = 0; n < tile->rect_count; n++) { + const struct sna_composite_rectangles *r = &tile->rects[n]; + int x1, x2, dx, y1, y2, dy; + + x1 = r->dst.x - tile->dst_x, dx = 0; + if (x1 < x) + dx = x - x1, x1 = x; + y1 = r->dst.y - tile->dst_y, dy = 0; + if (y1 < y) + dy = y - y1, y1 = y; + + x2 = r->dst.x + r->width - tile->dst_x; + if (x2 > x + width) + x2 = x + width; + y2 = r->dst.y + r->height - tile->dst_y; + if (y2 > y + height) + y2 = y + height; + + if (y2 > y1 && x2 > x1) { + struct sna_composite_rectangles rr; + rr.src.x = dx + r->src.x; + rr.src.y = dy + r->src.y; + + rr.mask.x = dx + r->mask.x; + rr.mask.y = dy + r->mask.y; + + rr.dst.x = dx + r->dst.x; + rr.dst.y = dy + r->dst.y; + + rr.width = x2 - x1; + rr.height = y2 - y1; + + tmp.blt(sna, &tmp, &rr); + } + } + tmp.done(sna, &tmp); + } else { + DBG(("%s -- falback\n", __FUNCTION__)); + + sna_drawable_move_to_cpu(tile->dst->pDrawable, true); + if (tile->src->pDrawable) + sna_drawable_move_to_cpu(tile->src->pDrawable, false); + if (tile->mask && tile->mask->pDrawable) + sna_drawable_move_to_cpu(tile->mask->pDrawable, false); + + fbComposite(tile->op, + tile->src, tile->mask, tile->dst, + tile->src_x + x, tile->src_y + y, + tile->mask_x + x, tile->mask_y + y, + tile->dst_x + x, tile->dst_y + y, + width, height); + } + } + } + +done: + if (tile->rects != tile->rects_embedded) + free(tile->rects); + free(tile); +} + +static inline int split(int x, int y) +{ + int n = x / y + 1; + return (x + n - 1) / n; +} + +Bool +sna_tiling_composite(struct sna *sna, + uint32_t op, + PicturePtr src, + PicturePtr mask, + PicturePtr dst, + int16_t src_x, int16_t src_y, + int16_t mask_x, int16_t mask_y, + int16_t dst_x, int16_t dst_y, + int16_t width, int16_t height, + struct sna_composite_op *tmp) +{ + struct sna_tile_state *tile; + struct sna_pixmap *priv; + + DBG(("%s size=(%d, %d), tile=%d\n", + __FUNCTION__, width, height, sna->render.max_3d_size)); + + priv = sna_pixmap(get_drawable_pixmap(dst->pDrawable)); + if (priv == NULL || priv->gpu_bo == NULL) + return FALSE; + + tile = malloc(sizeof(*tile)); + if (!tile) + return FALSE; + + tile->op = op; + + tile->src = src; + tile->mask = mask; + tile->dst = dst; + + tile->src_x = src_x; + tile->src_y = src_y; + tile->mask_x = mask_x; + tile->mask_y = mask_y; + tile->dst_x = dst_x; + tile->dst_y = dst_y; + tile->width = width; + tile->height = height; + tile->rects = tile->rects_embedded; + tile->rect_count = 0; + tile->rect_size = ARRAY_SIZE(tile->rects_embedded); + + tmp->blt = sna_tiling_composite_blt; + tmp->boxes = sna_tiling_composite_boxes; + tmp->done = sna_tiling_composite_done; + + tmp->u.priv = tile; + return TRUE; +} diff --git a/src/sna/sna_transform.c b/src/sna/sna_transform.c new file mode 100644 index 00000000..3cd9b07a --- /dev/null +++ b/src/sna/sna_transform.c @@ -0,0 +1,139 @@ +/* + * Copyright 1998-1999 Precision Insight, Inc., Cedar Park, Texas. All Rights Reserved. + * Copyright (c) 2005 Jesse Barnes <jbarnes@virtuousgeek.org> + * Copyright © 2010 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Jesse Barns <jbarnes@virtuousgeek.org> + * Chris Wilson <chris@chris-wilson.co.uk> + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "xf86.h" +#include "sna.h" + +/** + * Returns whether the provided transform is affine. + * + * transform may be null. + */ +Bool sna_transform_is_affine(const PictTransform *t) +{ + if (t == NULL) + return TRUE; + + return t->matrix[2][0] == 0 && t->matrix[2][1] == 0; +} + +Bool +sna_transform_is_translation(const PictTransform *t, + pixman_fixed_t *tx, + pixman_fixed_t *ty) +{ + if (t == NULL) { + *tx = *ty = 0; + return TRUE; + } + + if (t->matrix[0][0] != IntToxFixed(1) || + t->matrix[0][1] != 0 || + t->matrix[1][0] != 0 || + t->matrix[1][1] != IntToxFixed(1) || + t->matrix[2][0] != 0 || + t->matrix[2][1] != 0 || + t->matrix[2][2] != IntToxFixed(1)) + return FALSE; + + *tx = t->matrix[0][2]; + *ty = t->matrix[1][2]; + return TRUE; +} + +Bool +sna_transform_is_integer_translation(const PictTransform *t, int16_t *tx, int16_t *ty) +{ + if (t == NULL) { + *tx = *ty = 0; + return TRUE; + } + + if (t->matrix[0][0] != IntToxFixed(1) || + t->matrix[0][1] != 0 || + t->matrix[1][0] != 0 || + t->matrix[1][1] != IntToxFixed(1) || + t->matrix[2][0] != 0 || + t->matrix[2][1] != 0 || + t->matrix[2][2] != IntToxFixed(1)) + return FALSE; + + if (pixman_fixed_fraction(t->matrix[0][2]) || + pixman_fixed_fraction(t->matrix[1][2])) + return FALSE; + + *tx = pixman_fixed_to_int(t->matrix[0][2]); + *ty = pixman_fixed_to_int(t->matrix[1][2]); + return TRUE; +} + +/** + * Returns the floating-point coordinates transformed by the given transform. + */ +void +sna_get_transformed_coordinates(int x, int y, + const PictTransform *transform, + float *x_out, float *y_out) +{ + if (transform == NULL) { + *x_out = x; + *y_out = y; + } else + _sna_get_transformed_coordinates(x, y, transform, x_out, y_out); +} + +/** + * Returns the un-normalized floating-point coordinates transformed by the given transform. + */ +Bool +sna_get_transformed_coordinates_3d(int x, int y, + const PictTransform *transform, + float *x_out, float *y_out, float *w_out) +{ + if (transform == NULL) { + *x_out = x; + *y_out = y; + *w_out = 1; + } else { + int64_t result[3]; + + if (!_sna_transform_point(transform, x, y, result)) + return FALSE; + + *x_out = result[0] / 65536.; + *y_out = result[1] / 65536.; + *w_out = result[2] / 65536.; + } + + return TRUE; +} diff --git a/src/sna/sna_trapezoids.c b/src/sna/sna_trapezoids.c new file mode 100644 index 00000000..db9e085b --- /dev/null +++ b/src/sna/sna_trapezoids.c @@ -0,0 +1,2375 @@ +/* + * Copyright (c) 2007 David Turner + * Copyright (c) 2008 M Joonas Pihlaja + * Copyright (c) 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Chris Wilson <chris@chris-wilson.co.uk> + * + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "sna.h" +#include "sna_render.h" +#include "sna_render_inline.h" + +#include <mipict.h> +#include <fbpict.h> + +#if DEBUG_TRAPEZOIDS +#undef DBG +#define DBG(x) ErrorF x +#else +#define NDEBUG 1 +#endif + +#define NO_ACCEL 0 + +#define unlikely(x) x + +#define SAMPLES_X 17 +#define SAMPLES_Y 15 + +#define FAST_SAMPLES_X_shift 8 +#define FAST_SAMPLES_Y_shift 4 + +#define FAST_SAMPLES_X (1<<FAST_SAMPLES_X_shift) +#define FAST_SAMPLES_Y (1<<FAST_SAMPLES_Y_shift) + +#if DEBUG_TRAPEZOIDS +static void _assert_pixmap_contains_box(PixmapPtr pixmap, BoxPtr box, const char *function) +{ + if (box->x1 < 0 || box->y1 < 0 || + box->x2 > pixmap->drawable.width || + box->y2 > pixmap->drawable.height) + { + ErrorF("%s: damage box is beyond the pixmap: box=(%d, %d), (%d, %d), pixmap=(%d, %d)\n", + __FUNCTION__, + box->x1, box->y1, box->x2, box->y2, + pixmap->drawable.width, + pixmap->drawable.height); + assert(0); + } +} +#define assert_pixmap_contains_box(p, b) _assert_pixmap_contains_box(p, b, __FUNCTION__) +#else +#define assert_pixmap_contains_box(p, b) +#endif + +static void apply_damage(struct sna_composite_op *op, RegionPtr region) +{ + DBG(("%s: damage=%p, region=%d\n", + __FUNCTION__, op->damage, REGION_NUM_RECTS(region))); + + if (op->damage == NULL) + return; + + RegionTranslate(region, op->dst.x, op->dst.y); + + assert_pixmap_contains_box(op->dst.pixmap, RegionExtents(region)); + sna_damage_add(op->damage, region); +} + +static void apply_damage_box(struct sna_composite_op *op, const BoxRec *box) +{ + BoxRec r; + + if (op->damage == NULL) + return; + + r.x1 = box->x1 + op->dst.x; + r.x2 = box->x2 + op->dst.x; + r.y1 = box->y1 + op->dst.y; + r.y2 = box->y2 + op->dst.y; + + assert_pixmap_contains_box(op->dst.pixmap, &r); + sna_damage_add_box(op->damage, &r); +} + +typedef int grid_scaled_x_t; +typedef int grid_scaled_y_t; + +#define FAST_SAMPLES_X_TO_INT_FRAC(x, i, f) \ + _GRID_TO_INT_FRAC_shift(x, i, f, FAST_SAMPLES_X_shift) + +#define _GRID_TO_INT_FRAC_shift(t, i, f, b) do { \ + (f) = (t) & ((1 << (b)) - 1); \ + (i) = (t) >> (b); \ +} while (0) + +/* A grid area is a real in [0,1] scaled by 2*SAMPLES_X*SAMPLES_Y. We want + * to be able to represent exactly areas of subpixel trapezoids whose + * vertices are given in grid scaled coordinates. The scale factor + * comes from needing to accurately represent the area 0.5*dx*dy of a + * triangle with base dx and height dy in grid scaled numbers. */ +typedef int grid_area_t; +#define FAST_SAMPLES_XY (2*FAST_SAMPLES_X*FAST_SAMPLES_Y) /* Unit area on the grid. */ + +#define AREA_TO_ALPHA(c) ((c) / (float)FAST_SAMPLES_XY) + +struct quorem { + int32_t quo; + int32_t rem; +}; + +struct _pool_chunk { + size_t size; + size_t capacity; + + struct _pool_chunk *prev_chunk; + /* Actual data starts here. Well aligned for pointers. */ +}; + +/* A memory pool. This is supposed to be embedded on the stack or + * within some other structure. It may optionally be followed by an + * embedded array from which requests are fulfilled until + * malloc needs to be called to allocate a first real chunk. */ +struct pool { + struct _pool_chunk *current; + struct _pool_chunk *first_free; + + /* The default capacity of a chunk. */ + size_t default_capacity; + + /* Header for the sentinel chunk. Directly following the pool + * struct should be some space for embedded elements from which + * the sentinel chunk allocates from. */ + struct _pool_chunk sentinel[1]; +}; + +/* A polygon edge. */ +struct edge { + /* Next in y-bucket or active list. */ + struct edge *next; + + /* Current x coordinate while the edge is on the active + * list. Initialised to the x coordinate of the top of the + * edge. The quotient is in grid_scaled_x_t units and the + * remainder is mod dy in grid_scaled_y_t units.*/ + struct quorem x; + + /* Advance of the current x when moving down a subsample line. */ + struct quorem dxdy; + + /* Advance of the current x when moving down a full pixel + * row. Only initialised when the height of the edge is large + * enough that there's a chance the edge could be stepped by a + * full row's worth of subsample rows at a time. */ + struct quorem dxdy_full; + + /* The clipped y of the top of the edge. */ + grid_scaled_y_t ytop; + + /* y2-y1 after orienting the edge downwards. */ + grid_scaled_y_t dy; + + /* Number of subsample rows remaining to scan convert of this + * edge. */ + grid_scaled_y_t height_left; + + /* Original sign of the edge: +1 for downwards, -1 for upwards + * edges. */ + int dir; + int vertical; +}; + +/* Number of subsample rows per y-bucket. Must be SAMPLES_Y. */ +#define EDGE_Y_BUCKET_HEIGHT FAST_SAMPLES_Y +#define EDGE_Y_BUCKET_INDEX(y, ymin) (((y) - (ymin))/EDGE_Y_BUCKET_HEIGHT) + +/* A collection of sorted and vertically clipped edges of the polygon. + * Edges are moved from the polygon to an active list while scan + * converting. */ +struct polygon { + /* The vertical clip extents. */ + grid_scaled_y_t ymin, ymax; + + /* Array of edges all starting in the same bucket. An edge is put + * into bucket EDGE_BUCKET_INDEX(edge->ytop, polygon->ymin) when + * it is added to the polygon. */ + struct edge **y_buckets; + struct edge *y_buckets_embedded[64]; + + struct edge edges_embedded[32]; + struct edge *edges; + int num_edges; +}; + +/* A cell records the effect on pixel coverage of polygon edges + * passing through a pixel. It contains two accumulators of pixel + * coverage. + * + * Consider the effects of a polygon edge on the coverage of a pixel + * it intersects and that of the following one. The coverage of the + * following pixel is the height of the edge multiplied by the width + * of the pixel, and the coverage of the pixel itself is the area of + * the trapezoid formed by the edge and the right side of the pixel. + * + * +-----------------------+-----------------------+ + * | | | + * | | | + * |_______________________|_______________________| + * | \...................|.......................|\ + * | \..................|.......................| | + * | \.................|.......................| | + * | \....covered.....|.......................| | + * | \....area.......|.......................| } covered height + * | \..............|.......................| | + * |uncovered\.............|.......................| | + * | area \............|.......................| | + * |___________\...........|.......................|/ + * | | | + * | | | + * | | | + * +-----------------------+-----------------------+ + * + * Since the coverage of the following pixel will always be a multiple + * of the width of the pixel, we can store the height of the covered + * area instead. The coverage of the pixel itself is the total + * coverage minus the area of the uncovered area to the left of the + * edge. As it's faster to compute the uncovered area we only store + * that and subtract it from the total coverage later when forming + * spans to blit. + * + * The heights and areas are signed, with left edges of the polygon + * having positive sign and right edges having negative sign. When + * two edges intersect they swap their left/rightness so their + * contribution above and below the intersection point must be + * computed separately. */ +struct cell { + struct cell *next; + int x; + grid_area_t uncovered_area; + grid_scaled_y_t covered_height; +}; + +/* A cell list represents the scan line sparsely as cells ordered by + * ascending x. It is geared towards scanning the cells in order + * using an internal cursor. */ +struct cell_list { + /* Points to the left-most cell in the scan line. */ + struct cell *head; + /* Sentinel node */ + struct cell tail; + + struct cell **cursor; + + /* Cells in the cell list are owned by the cell list and are + * allocated from this pool. */ + struct { + struct pool base[1]; + struct cell embedded[32]; + } cell_pool; +}; + +/* The active list contains edges in the current scan line ordered by + * the x-coordinate of the intercept of the edge and the scan line. */ +struct active_list { + /* Leftmost edge on the current scan line. */ + struct edge *head; + + /* A lower bound on the height of the active edges is used to + * estimate how soon some active edge ends. We can't advance the + * scan conversion by a full pixel row if an edge ends somewhere + * within it. */ + grid_scaled_y_t min_height; +}; + +struct tor { + struct polygon polygon[1]; + struct active_list active[1]; + struct cell_list coverages[1]; + + /* Clip box. */ + grid_scaled_x_t xmin, xmax; + grid_scaled_y_t ymin, ymax; +}; + +/* Compute the floored division a/b. Assumes / and % perform symmetric + * division. */ +inline static struct quorem +floored_divrem(int a, int b) +{ + struct quorem qr; + qr.quo = a/b; + qr.rem = a%b; + if ((a^b)<0 && qr.rem) { + qr.quo -= 1; + qr.rem += b; + } + return qr; +} + +/* Compute the floored division (x*a)/b. Assumes / and % perform symmetric + * division. */ +static struct quorem +floored_muldivrem(int x, int a, int b) +{ + struct quorem qr; + long long xa = (long long)x*a; + qr.quo = xa/b; + qr.rem = xa%b; + if ((xa>=0) != (b>=0) && qr.rem) { + qr.quo -= 1; + qr.rem += b; + } + return qr; +} + +static void +_pool_chunk_init( + struct _pool_chunk *p, + struct _pool_chunk *prev_chunk, + size_t capacity) +{ + p->prev_chunk = prev_chunk; + p->size = 0; + p->capacity = capacity; +} + +static struct _pool_chunk * +_pool_chunk_create(struct _pool_chunk *prev_chunk, size_t size) +{ + struct _pool_chunk *p; + size_t size_with_head = size + sizeof(struct _pool_chunk); + + if (size_with_head < size) + return NULL; + + p = malloc(size_with_head); + if (p) + _pool_chunk_init(p, prev_chunk, size); + + return p; +} + +static void +pool_init(struct pool *pool, + size_t default_capacity, + size_t embedded_capacity) +{ + pool->current = pool->sentinel; + pool->first_free = NULL; + pool->default_capacity = default_capacity; + _pool_chunk_init(pool->sentinel, NULL, embedded_capacity); +} + +static void +pool_fini(struct pool *pool) +{ + struct _pool_chunk *p = pool->current; + do { + while (NULL != p) { + struct _pool_chunk *prev = p->prev_chunk; + if (p != pool->sentinel) + free(p); + p = prev; + } + p = pool->first_free; + pool->first_free = NULL; + } while (NULL != p); + pool_init(pool, 0, 0); +} + +/* Satisfy an allocation by first allocating a new large enough chunk + * and adding it to the head of the pool's chunk list. This function + * is called as a fallback if pool_alloc() couldn't do a quick + * allocation from the current chunk in the pool. */ +static void * +_pool_alloc_from_new_chunk(struct pool *pool, size_t size) +{ + struct _pool_chunk *chunk; + void *obj; + size_t capacity; + + /* If the allocation is smaller than the default chunk size then + * try getting a chunk off the free list. Force alloc of a new + * chunk for large requests. */ + capacity = size; + chunk = NULL; + if (size < pool->default_capacity) { + capacity = pool->default_capacity; + chunk = pool->first_free; + if (chunk) { + pool->first_free = chunk->prev_chunk; + _pool_chunk_init(chunk, pool->current, chunk->capacity); + } + } + + if (NULL == chunk) { + chunk = _pool_chunk_create (pool->current, capacity); + if (unlikely (NULL == chunk)) + return NULL; + } + pool->current = chunk; + + obj = ((unsigned char*)chunk + sizeof(*chunk) + chunk->size); + chunk->size += size; + return obj; +} + +inline static void * +pool_alloc(struct pool *pool, size_t size) +{ + struct _pool_chunk *chunk = pool->current; + + if (size <= chunk->capacity - chunk->size) { + void *obj = ((unsigned char*)chunk + sizeof(*chunk) + chunk->size); + chunk->size += size; + return obj; + } else + return _pool_alloc_from_new_chunk(pool, size); +} + +static void +pool_reset(struct pool *pool) +{ + /* Transfer all used chunks to the chunk free list. */ + struct _pool_chunk *chunk = pool->current; + if (chunk != pool->sentinel) { + while (chunk->prev_chunk != pool->sentinel) + chunk = chunk->prev_chunk; + + chunk->prev_chunk = pool->first_free; + pool->first_free = pool->current; + } + + /* Reset the sentinel as the current chunk. */ + pool->current = pool->sentinel; + pool->sentinel->size = 0; +} + +/* Rewinds the cell list's cursor to the beginning. After rewinding + * we're good to cell_list_find() the cell any x coordinate. */ +inline static void +cell_list_rewind(struct cell_list *cells) +{ + cells->cursor = &cells->head; +} + +/* Rewind the cell list if its cursor has been advanced past x. */ +inline static void +cell_list_maybe_rewind(struct cell_list *cells, int x) +{ + if ((*cells->cursor)->x > x) + cell_list_rewind(cells); +} + +static void +cell_list_init(struct cell_list *cells) +{ + pool_init(cells->cell_pool.base, + 256*sizeof(struct cell), + sizeof(cells->cell_pool.embedded)); + cells->tail.next = NULL; + cells->tail.x = INT_MAX; + cells->head = &cells->tail; + cell_list_rewind(cells); +} + +static void +cell_list_fini(struct cell_list *cells) +{ + pool_fini(cells->cell_pool.base); +} + +inline static void +cell_list_reset(struct cell_list *cells) +{ + cell_list_rewind(cells); + cells->head = &cells->tail; + pool_reset(cells->cell_pool.base); +} + +static struct cell * +cell_list_alloc(struct cell_list *cells, + struct cell *tail, + int x) +{ + struct cell *cell; + + cell = pool_alloc(cells->cell_pool.base, sizeof (struct cell)); + if (unlikely(NULL == cell)) + abort(); + + *cells->cursor = cell; + cell->next = tail; + cell->x = x; + cell->uncovered_area = 0; + cell->covered_height = 0; + return cell; +} + +/* Find a cell at the given x-coordinate. Returns %NULL if a new cell + * needed to be allocated but couldn't be. Cells must be found with + * non-decreasing x-coordinate until the cell list is rewound using + * cell_list_rewind(). Ownership of the returned cell is retained by + * the cell list. */ +inline static struct cell * +cell_list_find(struct cell_list *cells, int x) +{ + struct cell **cursor = cells->cursor; + struct cell *cell; + + do { + cell = *cursor; + if (cell->x >= x) + break; + + cursor = &cell->next; + } while (1); + cells->cursor = cursor; + + if (cell->x == x) + return cell; + + return cell_list_alloc(cells, cell, x); +} + +/* Add an unbounded subpixel span covering subpixels >= x to the + * coverage cells. */ +static void +cell_list_add_unbounded_subspan(struct cell_list *cells, grid_scaled_x_t x) +{ + struct cell *cell; + int ix, fx; + + FAST_SAMPLES_X_TO_INT_FRAC(x, ix, fx); + + DBG(("%s: x=%d (%d+%d)\n", __FUNCTION__, x, ix, fx)); + + cell = cell_list_find(cells, ix); + cell->uncovered_area += 2*fx; + cell->covered_height++; +} + +/* Add a subpixel span covering [x1, x2) to the coverage cells. */ +inline static void +cell_list_add_subspan(struct cell_list *cells, + grid_scaled_x_t x1, + grid_scaled_x_t x2) +{ + struct cell *cell; + int ix1, fx1; + int ix2, fx2; + + FAST_SAMPLES_X_TO_INT_FRAC(x1, ix1, fx1); + FAST_SAMPLES_X_TO_INT_FRAC(x2, ix2, fx2); + + DBG(("%s: x1=%d (%d+%d), x2=%d (%d+%d)\n", __FUNCTION__, + x1, ix1, fx1, x2, ix2, fx2)); + + cell = cell_list_find(cells, ix1); + if (ix1 != ix2) { + cell->uncovered_area += 2*fx1; + ++cell->covered_height; + + cell = cell_list_find(cells, ix2); + cell->uncovered_area -= 2*fx2; + --cell->covered_height; + } else + cell->uncovered_area += 2*(fx1-fx2); +} + +/* Adds the analytical coverage of an edge crossing the current pixel + * row to the coverage cells and advances the edge's x position to the + * following row. + * + * This function is only called when we know that during this pixel row: + * + * 1) The relative order of all edges on the active list doesn't + * change. In particular, no edges intersect within this row to pixel + * precision. + * + * 2) No new edges start in this row. + * + * 3) No existing edges end mid-row. + * + * This function depends on being called with all edges from the + * active list in the order they appear on the list (i.e. with + * non-decreasing x-coordinate.) */ +static void +cell_list_render_edge(struct cell_list *cells, struct edge *edge, int sign) +{ + grid_scaled_y_t y1, y2, dy; + grid_scaled_x_t fx1, fx2, dx; + int ix1, ix2; + struct quorem x1 = edge->x; + struct quorem x2 = x1; + + if (!edge->vertical) { + x2.quo += edge->dxdy_full.quo; + x2.rem += edge->dxdy_full.rem; + if (x2.rem >= 0) { + ++x2.quo; + x2.rem -= edge->dy; + } + + edge->x = x2; + } + + FAST_SAMPLES_X_TO_INT_FRAC(x1.quo, ix1, fx1); + FAST_SAMPLES_X_TO_INT_FRAC(x2.quo, ix2, fx2); + + DBG(("%s: x1=%d (%d+%d), x2=%d (%d+%d)\n", __FUNCTION__, + x1.quo, ix1, fx1, x2.quo, ix2, fx2)); + + /* Edge is entirely within a column? */ + if (ix1 == ix2) { + /* We always know that ix1 is >= the cell list cursor in this + * case due to the no-intersections precondition. */ + struct cell *cell = cell_list_find(cells, ix1); + cell->covered_height += sign*FAST_SAMPLES_Y; + cell->uncovered_area += sign*(fx1 + fx2)*FAST_SAMPLES_Y; + return; + } + + /* Orient the edge left-to-right. */ + dx = x2.quo - x1.quo; + if (dx >= 0) { + y1 = 0; + y2 = FAST_SAMPLES_Y; + } else { + int tmp; + tmp = ix1; ix1 = ix2; ix2 = tmp; + tmp = fx1; fx1 = fx2; fx2 = tmp; + dx = -dx; + sign = -sign; + y1 = FAST_SAMPLES_Y; + y2 = 0; + } + dy = y2 - y1; + + /* Add coverage for all pixels [ix1,ix2] on this row crossed + * by the edge. */ + { + struct quorem y = floored_divrem((FAST_SAMPLES_X - fx1)*dy, dx); + struct cell *cell; + + /* When rendering a previous edge on the active list we may + * advance the cell list cursor past the leftmost pixel of the + * current edge even though the two edges don't intersect. + * e.g. consider two edges going down and rightwards: + * + * --\_+---\_+-----+-----+---- + * \_ \_ | | + * | \_ | \_ | | + * | \_| \_| | + * | \_ \_ | + * ----+-----+-\---+-\---+---- + * + * The left edge touches cells past the starting cell of the + * right edge. Fortunately such cases are rare. + * + * The rewinding is never necessary if the current edge stays + * within a single column because we've checked before calling + * this function that the active list order won't change. */ + cell_list_maybe_rewind(cells, ix1); + + cell = cell_list_find(cells, ix1); + cell->uncovered_area += sign*y.quo*(FAST_SAMPLES_X + fx1); + cell->covered_height += sign*y.quo; + y.quo += y1; + + cell = cell_list_find(cells, ++ix1); + if (ix1 < ix2) { + struct quorem dydx_full = floored_divrem(FAST_SAMPLES_X*dy, dx); + do { + grid_scaled_y_t y_skip = dydx_full.quo; + y.rem += dydx_full.rem; + if (y.rem >= dx) { + ++y_skip; + y.rem -= dx; + } + + y.quo += y_skip; + + y_skip *= sign; + cell->uncovered_area += y_skip*FAST_SAMPLES_X; + cell->covered_height += y_skip; + + cell = cell_list_find(cells, ++ix1); + } while (ix1 != ix2); + } + cell->uncovered_area += sign*(y2 - y.quo)*fx2; + cell->covered_height += sign*(y2 - y.quo); + } +} + + +static void +polygon_fini(struct polygon *polygon) +{ + if (polygon->y_buckets != polygon->y_buckets_embedded) + free(polygon->y_buckets); + + if (polygon->edges != polygon->edges_embedded) + free(polygon->edges); +} + +static int +polygon_init(struct polygon *polygon, + int num_edges, + grid_scaled_y_t ymin, + grid_scaled_y_t ymax) +{ + unsigned h = ymax - ymin; + unsigned num_buckets = EDGE_Y_BUCKET_INDEX(ymax+EDGE_Y_BUCKET_HEIGHT-1, + ymin); + + if (unlikely(h > 0x7FFFFFFFU - EDGE_Y_BUCKET_HEIGHT)) + goto bail_no_mem; /* even if you could, you wouldn't want to. */ + + polygon->edges = polygon->edges_embedded; + polygon->y_buckets = polygon->y_buckets_embedded; + + polygon->num_edges = 0; + if (num_edges > ARRAY_SIZE(polygon->edges_embedded)) { + polygon->edges = malloc(sizeof(struct edge)*num_edges); + if (unlikely(NULL == polygon->edges)) + goto bail_no_mem; + } + + if (num_buckets > ARRAY_SIZE(polygon->y_buckets_embedded)) { + polygon->y_buckets = malloc(num_buckets*sizeof(struct edge *)); + if (unlikely(NULL == polygon->y_buckets)) + goto bail_no_mem; + } + memset(polygon->y_buckets, 0, num_buckets * sizeof(struct edge *)); + + polygon->ymin = ymin; + polygon->ymax = ymax; + return 0; + +bail_no_mem: + polygon_fini(polygon); + return -1; +} + +static void +_polygon_insert_edge_into_its_y_bucket(struct polygon *polygon, struct edge *e) +{ + unsigned ix = EDGE_Y_BUCKET_INDEX(e->ytop, polygon->ymin); + struct edge **ptail = &polygon->y_buckets[ix]; + e->next = *ptail; + *ptail = e; +} + +inline static void +polygon_add_edge(struct polygon *polygon, + grid_scaled_x_t x1, + grid_scaled_x_t x2, + grid_scaled_y_t y1, + grid_scaled_y_t y2, + grid_scaled_y_t top, + grid_scaled_y_t bottom, + int dir) +{ + struct edge *e = &polygon->edges[polygon->num_edges++]; + grid_scaled_x_t dx = x2 - x1; + grid_scaled_y_t dy = y2 - y1; + grid_scaled_y_t ytop, ybot; + grid_scaled_y_t ymin = polygon->ymin; + grid_scaled_y_t ymax = polygon->ymax; + + e->dy = dy; + e->dir = dir; + + ytop = top >= ymin ? top : ymin; + ybot = bottom <= ymax ? bottom : ymax; + e->ytop = ytop; + e->height_left = ybot - ytop; + + if (dx == 0) { + e->vertical = true; + e->x.quo = x1; + e->x.rem = 0; + e->dxdy.quo = 0; + e->dxdy.rem = 0; + e->dxdy_full.quo = 0; + e->dxdy_full.rem = 0; + } else { + e->vertical = false; + e->dxdy = floored_divrem(dx, dy); + if (ytop == y1) { + e->x.quo = x1; + e->x.rem = 0; + } else { + e->x = floored_muldivrem(ytop - y1, dx, dy); + e->x.quo += x1; + } + + if (e->height_left >= FAST_SAMPLES_Y) { + e->dxdy_full = floored_muldivrem(FAST_SAMPLES_Y, dx, dy); + } else { + e->dxdy_full.quo = 0; + e->dxdy_full.rem = 0; + } + } + + _polygon_insert_edge_into_its_y_bucket(polygon, e); + + e->x.rem -= dy; /* Bias the remainder for faster edge advancement. */ +} + +static void +active_list_reset(struct active_list *active) +{ + active->head = NULL; + active->min_height = 0; +} + +/* + * Merge two sorted edge lists. + * Input: + * - head_a: The head of the first list. + * - head_b: The head of the second list; head_b cannot be NULL. + * Output: + * Returns the head of the merged list. + * + * Implementation notes: + * To make it fast (in particular, to reduce to an insertion sort whenever + * one of the two input lists only has a single element) we iterate through + * a list until its head becomes greater than the head of the other list, + * then we switch their roles. As soon as one of the two lists is empty, we + * just attach the other one to the current list and exit. + * Writes to memory are only needed to "switch" lists (as it also requires + * attaching to the output list the list which we will be iterating next) and + * to attach the last non-empty list. + */ +static struct edge * +merge_sorted_edges(struct edge *head_a, struct edge *head_b) +{ + struct edge *head, **next; + + head = head_a; + next = &head; + + while (1) { + while (head_a != NULL && head_a->x.quo <= head_b->x.quo) { + next = &head_a->next; + head_a = head_a->next; + } + + *next = head_b; + if (head_a == NULL) + return head; + + while (head_b != NULL && head_b->x.quo <= head_a->x.quo) { + next = &head_b->next; + head_b = head_b->next; + } + + *next = head_a; + if (head_b == NULL) + return head; + } +} + +/* + * Sort (part of) a list. + * Input: + * - list: The list to be sorted; list cannot be NULL. + * - limit: Recursion limit. + * Output: + * - head_out: The head of the sorted list containing the first 2^(level+1) elements of the + * input list; if the input list has fewer elements, head_out be a sorted list + * containing all the elements of the input list. + * Returns the head of the list of unprocessed elements (NULL if the sorted list contains + * all the elements of the input list). + * + * Implementation notes: + * Special case single element list, unroll/inline the sorting of the first two elements. + * Some tail recursion is used since we iterate on the bottom-up solution of the problem + * (we start with a small sorted list and keep merging other lists of the same size to it). + */ +static struct edge * +sort_edges(struct edge *list, + unsigned int level, + struct edge **head_out) +{ + struct edge *head_other, *remaining; + unsigned int i; + + head_other = list->next; + + /* Single element list -> return */ + if (head_other == NULL) { + *head_out = list; + return NULL; + } + + /* Unroll the first iteration of the following loop (halves the number of calls to merge_sorted_edges): + * - Initialize remaining to be the list containing the elements after the second in the input list. + * - Initialize *head_out to be the sorted list containing the first two element. + */ + remaining = head_other->next; + if (list->x.quo <= head_other->x.quo) { + *head_out = list; + /* list->next = head_other; */ /* The input list is already like this. */ + head_other->next = NULL; + } else { + *head_out = head_other; + head_other->next = list; + list->next = NULL; + } + + for (i = 0; i < level && remaining; i++) { + /* Extract a sorted list of the same size as *head_out + * (2^(i+1) elements) from the list of remaining elements. */ + remaining = sort_edges(remaining, i, &head_other); + *head_out = merge_sorted_edges(*head_out, head_other); + } + + /* *head_out now contains (at most) 2^(level+1) elements. */ + + return remaining; +} + +/* Test if the edges on the active list can be safely advanced by a + * full row without intersections or any edges ending. */ +inline static bool +active_list_can_step_full_row(struct active_list *active) +{ + const struct edge *e; + int prev_x = INT_MIN; + + /* Recomputes the minimum height of all edges on the active + * list if we have been dropping edges. */ + if (active->min_height <= 0) { + int min_height = INT_MAX; + + e = active->head; + while (NULL != e) { + if (e->height_left < min_height) + min_height = e->height_left; + e = e->next; + } + + active->min_height = min_height; + } + + if (active->min_height < FAST_SAMPLES_Y) + return false; + + /* Check for intersections as no edges end during the next row. */ + e = active->head; + while (NULL != e) { + struct quorem x = e->x; + + if (!e->vertical) { + x.quo += e->dxdy_full.quo; + x.rem += e->dxdy_full.rem; + if (x.rem >= 0) + ++x.quo; + } + + if (x.quo <= prev_x) + return false; + + prev_x = x.quo; + e = e->next; + } + + return true; +} + +/* Merges edges on the given subpixel row from the polygon to the + * active_list. */ +inline static void +merge_edges(struct active_list *active, + grid_scaled_y_t y, + struct edge **ptail) +{ + /* Split off the edges on the current subrow and merge them into + * the active list. */ + int min_height = active->min_height; + struct edge *subrow_edges = NULL; + + do { + struct edge *tail = *ptail; + if (NULL == tail) + break; + + if (y == tail->ytop) { + *ptail = tail->next; + tail->next = subrow_edges; + subrow_edges = tail; + if (tail->height_left < min_height) + min_height = tail->height_left; + } else + ptail = &tail->next; + } while (1); + + if (subrow_edges) { + sort_edges(subrow_edges, UINT_MAX, &subrow_edges); + active->head = merge_sorted_edges(active->head, subrow_edges); + active->min_height = min_height; + } +} + +/* Advance the edges on the active list by one subsample row by + * updating their x positions. Drop edges from the list that end. */ +inline static void +substep_edges(struct active_list *active) +{ + struct edge **cursor = &active->head; + grid_scaled_x_t prev_x = INT_MIN; + struct edge *unsorted = NULL; + + do { + struct edge *edge = *cursor; + if (NULL == edge) + break; + + if (0 != --edge->height_left) { + edge->x.quo += edge->dxdy.quo; + edge->x.rem += edge->dxdy.rem; + if (edge->x.rem >= 0) { + ++edge->x.quo; + edge->x.rem -= edge->dy; + } + + if (edge->x.quo < prev_x) { + *cursor = edge->next; + edge->next = unsorted; + unsorted = edge; + } else { + prev_x = edge->x.quo; + cursor = &edge->next; + } + } else + *cursor = edge->next; + } while (1); + + if (unsorted) { + sort_edges(unsorted, UINT_MAX, &unsorted); + active->head = merge_sorted_edges(active->head, unsorted); + } +} + +inline static void +apply_nonzero_fill_rule_for_subrow(struct active_list *active, + struct cell_list *coverages) +{ + struct edge *edge = active->head; + int winding = 0; + int xstart; + int xend; + + cell_list_rewind (coverages); + + while (NULL != edge) { + xstart = edge->x.quo; + winding = edge->dir; + while (1) { + edge = edge->next; + if (NULL == edge) + return cell_list_add_unbounded_subspan(coverages, xstart); + + winding += edge->dir; + if (0 == winding) { + if (edge->next == NULL || + edge->next->x.quo != edge->x.quo) + break; + } + } + + xend = edge->x.quo; + cell_list_add_subspan(coverages, xstart, xend); + + edge = edge->next; + } +} + +static void +apply_nonzero_fill_rule_and_step_edges(struct active_list *active, + struct cell_list *coverages) +{ + struct edge **cursor = &active->head; + struct edge *left_edge; + + left_edge = *cursor; + while (NULL != left_edge) { + struct edge *right_edge; + int winding = left_edge->dir; + + left_edge->height_left -= FAST_SAMPLES_Y; + if (left_edge->height_left) + cursor = &left_edge->next; + else + *cursor = left_edge->next; + + do { + right_edge = *cursor; + if (NULL == right_edge) + return cell_list_render_edge(coverages, + left_edge, + +1); + + right_edge->height_left -= FAST_SAMPLES_Y; + if (right_edge->height_left) + cursor = &right_edge->next; + else + *cursor = right_edge->next; + + winding += right_edge->dir; + if (0 == winding) { + if (right_edge->next == NULL || + right_edge->next->x.quo != right_edge->x.quo) + break; + } + + if (!right_edge->vertical) { + right_edge->x.quo += right_edge->dxdy_full.quo; + right_edge->x.rem += right_edge->dxdy_full.rem; + if (right_edge->x.rem >= 0) { + ++right_edge->x.quo; + right_edge->x.rem -= right_edge->dy; + } + } + } while (1); + + cell_list_render_edge(coverages, left_edge, +1); + cell_list_render_edge(coverages, right_edge, -1); + + left_edge = *cursor; + } +} + +static void +tor_fini(struct tor *converter) +{ + polygon_fini(converter->polygon); + cell_list_fini(converter->coverages); +} + +static int +tor_init(struct tor *converter, const BoxRec *box, int num_edges) +{ + DBG(("%s: (%d, %d),(%d, %d) x (%d, %d)\n", + __FUNCTION__, + box->x1, box->y1, box->x2, box->y2, + FAST_SAMPLES_X, FAST_SAMPLES_Y)); + + converter->xmin = box->x1; + converter->ymin = box->y1; + converter->xmax = box->x2; + converter->ymax = box->y2; + + cell_list_init(converter->coverages); + active_list_reset(converter->active); + return polygon_init(converter->polygon, + num_edges, + box->y1 * FAST_SAMPLES_Y, + box->y2 * FAST_SAMPLES_Y); +} + +static void +tor_add_edge(struct tor *converter, + int dx, int dy, + int top, int bottom, + const xLineFixed *edge, + int dir) +{ + int x1, x2; + int y1, y2; + + y1 = dy + (edge->p1.y >> (16 - FAST_SAMPLES_Y_shift)); + y2 = dy + (edge->p2.y >> (16 - FAST_SAMPLES_Y_shift)); + if (y1 == y2) + return; + + x1 = dx + (edge->p1.x >> (16 - FAST_SAMPLES_X_shift)); + x2 = dx + (edge->p2.x >> (16 - FAST_SAMPLES_X_shift)); + + DBG(("%s: edge=(%d, %d), (%d, %d), top=%d, bottom=%d, dir=%d\n", + __FUNCTION__, x1, y1, x2, y2, top, bottom, dir)); + polygon_add_edge(converter->polygon, + x1, x2, + y1, y2, + top, bottom, + dir); +} + +static bool +active_list_is_vertical(struct active_list *active) +{ + struct edge *e; + + for (e = active->head; e != NULL; e = e->next) + if (!e->vertical) + return false; + + return true; +} + +static void +step_edges(struct active_list *active, int count) +{ + struct edge **cursor = &active->head; + struct edge *edge; + + for (edge = *cursor; edge != NULL; edge = *cursor) { + edge->height_left -= FAST_SAMPLES_Y * count; + if (edge->height_left) + cursor = &edge->next; + else + *cursor = edge->next; + } +} + +static void +tor_blt_span(struct sna *sna, + struct sna_composite_spans_op *op, + pixman_region16_t *clip, + const BoxRec *box, + int coverage) +{ + DBG(("%s: %d -> %d @ %d\n", __FUNCTION__, box->x1, box->x2, coverage)); + + op->boxes(sna, op, box, 1, AREA_TO_ALPHA(coverage)); + apply_damage_box(&op->base, box); +} + +static void +tor_blt_span_clipped(struct sna *sna, + struct sna_composite_spans_op *op, + pixman_region16_t *clip, + const BoxRec *box, + int coverage) +{ + pixman_region16_t region; + float opacity; + + opacity = AREA_TO_ALPHA(coverage); + DBG(("%s: %d -> %d @ %f\n", __FUNCTION__, box->x1, box->x2, opacity)); + + pixman_region_init_rects(®ion, box, 1); + RegionIntersect(®ion, ®ion, clip); + if (REGION_NUM_RECTS(®ion)) { + op->boxes(sna, op, + REGION_RECTS(®ion), + REGION_NUM_RECTS(®ion), + opacity); + apply_damage(&op->base, ®ion); + } + pixman_region_fini(®ion); +} + +static void +tor_blt_span_mono(struct sna *sna, + struct sna_composite_spans_op *op, + pixman_region16_t *clip, + const BoxRec *box, + int coverage) +{ + if (coverage < FAST_SAMPLES_XY/2) + return; + + tor_blt_span(sna, op, clip, box, FAST_SAMPLES_XY); +} + +static void +tor_blt_span_mono_clipped(struct sna *sna, + struct sna_composite_spans_op *op, + pixman_region16_t *clip, + const BoxRec *box, + int coverage) +{ + if (coverage < FAST_SAMPLES_XY/2) + return; + + tor_blt_span_clipped(sna, op, clip, box, FAST_SAMPLES_XY); +} + +static void +tor_blt_span_mono_unbounded(struct sna *sna, + struct sna_composite_spans_op *op, + pixman_region16_t *clip, + const BoxRec *box, + int coverage) +{ + tor_blt_span(sna, op, clip, box, + coverage < FAST_SAMPLES_XY/2 ? 0 :FAST_SAMPLES_XY); +} + +static void +tor_blt_span_mono_unbounded_clipped(struct sna *sna, + struct sna_composite_spans_op *op, + pixman_region16_t *clip, + const BoxRec *box, + int coverage) +{ + tor_blt_span_clipped(sna, op, clip, box, + coverage < FAST_SAMPLES_XY/2 ? 0 :FAST_SAMPLES_XY); +} + +static void +tor_blt(struct sna *sna, + struct sna_composite_spans_op *op, + pixman_region16_t *clip, + void (*span)(struct sna *sna, + struct sna_composite_spans_op *op, + pixman_region16_t *clip, + const BoxRec *box, + int coverage), + struct cell_list *cells, + int y, int height, + int xmin, int xmax, + int unbounded) +{ + struct cell *cell = cells->head; + BoxRec box; + int cover = 0; + + /* Skip cells to the left of the clip region. */ + while (cell != NULL && cell->x < xmin) { + DBG(("%s: skipping cell (%d, %d, %d)\n", + __FUNCTION__, + cell->x, cell->covered_height, cell->uncovered_area)); + + cover += cell->covered_height; + cell = cell->next; + } + cover *= FAST_SAMPLES_X*2; + + box.y1 = y; + box.y2 = y + height; + box.x1 = xmin; + + /* Form the spans from the coverages and areas. */ + for (; cell != NULL; cell = cell->next) { + int x = cell->x; + + DBG(("%s: cell=(%d, %d, %d), cover=%d, max=%d\n", __FUNCTION__, + cell->x, cell->covered_height, cell->uncovered_area, + cover, xmax)); + + if (x >= xmax) + break; + + box.x2 = x; + if (box.x2 > box.x1 && (unbounded || cover)) + span(sna, op, clip, &box, cover); + box.x1 = box.x2; + + cover += cell->covered_height*FAST_SAMPLES_X*2; + + if (cell->uncovered_area) { + int area = cover - cell->uncovered_area; + box.x2 = x + 1; + if (unbounded || area) + span(sna, op, clip, &box, area); + box.x1 = box.x2; + } + } + + box.x2 = xmax; + if (box.x2 > box.x1 && (unbounded || cover)) + span(sna, op, clip, &box, cover); +} + +static void +tor_blt_empty(struct sna *sna, + struct sna_composite_spans_op *op, + pixman_region16_t *clip, + void (*span)(struct sna *sna, + struct sna_composite_spans_op *op, + pixman_region16_t *clip, + const BoxRec *box, + int coverage), + int y, int height, + int xmin, int xmax) +{ + BoxRec box; + + box.x1 = xmin; + box.x2 = xmax; + box.y1 = y; + box.y2 = y + height; + + span(sna, op, clip, &box, 0); +} + +static void +tor_render(struct sna *sna, + struct tor *converter, + struct sna_composite_spans_op *op, + pixman_region16_t *clip, + void (*span)(struct sna *sna, + struct sna_composite_spans_op *op, + pixman_region16_t *clip, + const BoxRec *box, + int coverage), + int unbounded) +{ + int ymin = converter->ymin; + int xmin = converter->xmin; + int xmax = converter->xmax; + int i, j, h = converter->ymax - ymin; + struct polygon *polygon = converter->polygon; + struct cell_list *coverages = converter->coverages; + struct active_list *active = converter->active; + + DBG(("%s: unbounded=%d\n", __FUNCTION__, unbounded)); + + /* Render each pixel row. */ + for (i = 0; i < h; i = j) { + int do_full_step = 0; + + j = i + 1; + + /* Determine if we can ignore this row or use the full pixel + * stepper. */ + if (!polygon->y_buckets[i]) { + if (!active->head) { + for (; j < h && !polygon->y_buckets[j]; j++) + ; + DBG(("%s: no new edges and no exisiting edges, skipping, %d -> %d\n", + __FUNCTION__, i, j)); + + if (unbounded) + tor_blt_empty(sna, op, clip, span, i+ymin, j-i, xmin, xmax); + continue; + } + + do_full_step = active_list_can_step_full_row(active); + } + + DBG(("%s: do_full_step=%d, new edges=%d\n", + __FUNCTION__, do_full_step, polygon->y_buckets[i] != NULL)); + if (do_full_step) { + /* Step by a full pixel row's worth. */ + apply_nonzero_fill_rule_and_step_edges(active, + coverages); + + if (active_list_is_vertical(active)) { + while (j < h && + polygon->y_buckets[j] == NULL && + active->min_height >= 2*FAST_SAMPLES_Y) + { + active->min_height -= FAST_SAMPLES_Y; + j++; + } + if (j != i + 1) + step_edges(active, j - (i + 1)); + + DBG(("%s: vertical edges, full step (%d, %d)\n", + __FUNCTION__, i, j)); + } + } else { + grid_scaled_y_t y = (i+ymin)*FAST_SAMPLES_Y; + grid_scaled_y_t suby; + + /* Subsample this row. */ + for (suby = 0; suby < FAST_SAMPLES_Y; suby++) { + if (polygon->y_buckets[i]) + merge_edges(active, + y + suby, + &polygon->y_buckets[i]); + + apply_nonzero_fill_rule_for_subrow(active, + coverages); + substep_edges(active); + } + } + + if (coverages->head != &coverages->tail) { + tor_blt(sna, op, clip, span, coverages, + i+ymin, j-i, xmin, xmax, + unbounded); + cell_list_reset(coverages); + } else if (unbounded) + tor_blt_empty(sna, op, clip, span, i+ymin, j-i, xmin, xmax); + + if (!active->head) + active->min_height = INT_MAX; + else + active->min_height -= FAST_SAMPLES_Y; + } +} + +static int operator_is_bounded(uint8_t op) +{ + switch (op) { + case PictOpOver: + case PictOpOutReverse: + case PictOpAdd: + return TRUE; + default: + return FALSE; + } +} + +static void +trapezoids_fallback(CARD8 op, PicturePtr src, PicturePtr dst, + PictFormatPtr maskFormat, INT16 xSrc, INT16 ySrc, + int ntrap, xTrapezoid * traps) +{ + ScreenPtr screen = dst->pDrawable->pScreen; + + if (maskFormat) { + PixmapPtr scratch; + PicturePtr mask; + INT16 dst_x, dst_y; + BoxRec bounds; + int width, height, depth; + pixman_image_t *image; + pixman_format_code_t format; + int error; + + dst_x = pixman_fixed_to_int(traps[0].left.p1.x); + dst_y = pixman_fixed_to_int(traps[0].left.p1.y); + + miTrapezoidBounds(ntrap, traps, &bounds); + if (bounds.y1 >= bounds.y2 || bounds.x1 >= bounds.x2) + return; + + DBG(("%s: bounds (%d, %d), (%d, %d)\n", + __FUNCTION__, bounds.x1, bounds.y1, bounds.x2, bounds.y2)); + + if (!sna_compute_composite_extents(&bounds, + src, NULL, dst, + xSrc, ySrc, + 0, 0, + bounds.x1, bounds.y1, + bounds.x2 - bounds.x1, + bounds.y2 - bounds.y1)) + return; + + DBG(("%s: extents (%d, %d), (%d, %d)\n", + __FUNCTION__, bounds.x1, bounds.y1, bounds.x2, bounds.y2)); + + width = bounds.x2 - bounds.x1; + height = bounds.y2 - bounds.y1; + bounds.x1 -= dst->pDrawable->x; + bounds.y1 -= dst->pDrawable->y; + depth = maskFormat->depth; + format = maskFormat->format | (BitsPerPixel(depth) << 24); + + DBG(("%s: mask (%dx%d) depth=%d, format=%08x\n", + __FUNCTION__, width, height, depth, format)); + scratch = sna_pixmap_create_upload(screen, + width, height, depth); + if (!scratch) + return; + + memset(scratch->devPrivate.ptr, 0, scratch->devKind*height); + image = pixman_image_create_bits(format, width, height, + scratch->devPrivate.ptr, + scratch->devKind); + if (image) { + for (; ntrap; ntrap--, traps++) + pixman_rasterize_trapezoid(image, + (pixman_trapezoid_t *)traps, + -bounds.x1, -bounds.y1); + + pixman_image_unref(image); + } + + mask = CreatePicture(0, &scratch->drawable, + PictureMatchFormat(screen, depth, format), + 0, 0, serverClient, &error); + screen->DestroyPixmap(scratch); + if (!mask) + return; + + CompositePicture(op, src, mask, dst, + xSrc + bounds.x1 - dst_x, + ySrc + bounds.y1 - dst_y, + 0, 0, + bounds.x1, bounds.y1, + width, height); + FreePicture(mask, 0); + } else { + if (dst->polyEdge == PolyEdgeSharp) + maskFormat = PictureMatchFormat(screen, 1, PICT_a1); + else + maskFormat = PictureMatchFormat(screen, 8, PICT_a8); + + for (; ntrap; ntrap--, traps++) + trapezoids_fallback(op, + src, dst, maskFormat, + xSrc, ySrc, 1, traps); + } +} + +static Bool +composite_aligned_boxes(CARD8 op, + PicturePtr src, + PicturePtr dst, + PictFormatPtr maskFormat, + INT16 src_x, INT16 src_y, + int ntrap, xTrapezoid *traps) +{ + BoxRec stack_boxes[64], *boxes, extents; + pixman_region16_t region, clip; + struct sna *sna; + struct sna_composite_op tmp; + Bool ret = true; + int dx, dy, n, num_boxes; + + DBG(("%s\n", __FUNCTION__)); + + boxes = stack_boxes; + if (ntrap > ARRAY_SIZE(stack_boxes)) + boxes = malloc(sizeof(BoxRec)*ntrap); + + dx = dst->pDrawable->x; + dy = dst->pDrawable->y; + + extents.x1 = extents.y1 = 32767; + extents.x2 = extents.y2 = -32767; + num_boxes = 0; + for (n = 0; n < ntrap; n++) { + boxes[num_boxes].x1 = dx + pixman_fixed_to_int(traps[n].left.p1.x + pixman_fixed_1_minus_e/2); + boxes[num_boxes].y1 = dy + pixman_fixed_to_int(traps[n].top + pixman_fixed_1_minus_e/2); + boxes[num_boxes].x2 = dx + pixman_fixed_to_int(traps[n].right.p2.x + pixman_fixed_1_minus_e/2); + boxes[num_boxes].y2 = dy + pixman_fixed_to_int(traps[n].bottom + pixman_fixed_1_minus_e/2); + + if (boxes[num_boxes].x1 >= boxes[num_boxes].x2) + continue; + if (boxes[num_boxes].y1 >= boxes[num_boxes].y2) + continue; + + if (boxes[num_boxes].x1 < extents.x1) + extents.x1 = boxes[num_boxes].x1; + if (boxes[num_boxes].x2 > extents.x2) + extents.x2 = boxes[num_boxes].x2; + + if (boxes[num_boxes].y1 < extents.y1) + extents.y1 = boxes[num_boxes].y1; + if (boxes[num_boxes].y2 > extents.y2) + extents.y2 = boxes[num_boxes].y2; + + num_boxes++; + } + + if (num_boxes == 0) + return true; + + DBG(("%s: extents (%d, %d), (%d, %d) offset of (%d, %d)\n", + __FUNCTION__, + extents.x1, extents.y1, + extents.x2, extents.y2, + extents.x1 - boxes[0].x1, + extents.y1 - boxes[0].y1)); + + src_x += extents.x1 - boxes[0].x1; + src_y += extents.y1 - boxes[0].y1; + + if (!sna_compute_composite_region(&clip, + src, NULL, dst, + src_x, src_y, + 0, 0, + extents.x1 - dx, extents.y1 - dy, + extents.x2 - extents.x1, + extents.y2 - extents.y1)) { + DBG(("%s: trapezoids do not intersect drawable clips\n", + __FUNCTION__)) ; + goto done; + } + + memset(&tmp, 0, sizeof(tmp)); + sna = to_sna_from_drawable(dst->pDrawable); + if (!sna->render.composite(sna, op, src, NULL, dst, + src_x, src_y, + 0, 0, + extents.x1, extents.y1, + extents.x2 - extents.x1, + extents.y2 - extents.y1, + &tmp)) { + DBG(("%s: composite render op not supported\n", + __FUNCTION__)); + ret = false; + goto done; + } + + if (maskFormat || + (op == PictOpSrc || op == PictOpClear) || + num_boxes == 1) { + pixman_region_init_rects(®ion, boxes, num_boxes); + RegionIntersect(®ion, ®ion, &clip); + if (REGION_NUM_RECTS(®ion)) { + tmp.boxes(sna, &tmp, + REGION_RECTS(®ion), + REGION_NUM_RECTS(®ion)); + apply_damage(&tmp, ®ion); + } + pixman_region_fini(®ion); + } else { + for (n = 0; n < num_boxes; n++) { + pixman_region_init_rects(®ion, &boxes[n], 1); + RegionIntersect(®ion, ®ion, &clip); + if (REGION_NUM_RECTS(®ion)) { + tmp.boxes(sna, &tmp, + REGION_RECTS(®ion), + REGION_NUM_RECTS(®ion)); + apply_damage(&tmp, ®ion); + } + pixman_region_fini(®ion); + } + } + tmp.done(sna, &tmp); + +done: + REGION_UNINIT(NULL, &clip); + if (boxes != stack_boxes) + free(boxes); + + return ret; +} + +static inline int coverage(int samples, pixman_fixed_t f) +{ + return (samples * pixman_fixed_frac(f) + pixman_fixed_1/2) / pixman_fixed_1; +} + +static void +composite_unaligned_box(struct sna *sna, + struct sna_composite_spans_op *tmp, + const BoxRec *box, + float opacity, + pixman_region16_t *clip) +{ + pixman_region16_t region; + + pixman_region_init_rects(®ion, box, 1); + RegionIntersect(®ion, ®ion, clip); + if (REGION_NUM_RECTS(®ion)) { + tmp->boxes(sna, tmp, + REGION_RECTS(®ion), + REGION_NUM_RECTS(®ion), + opacity); + apply_damage(&tmp->base, ®ion); + } + pixman_region_fini(®ion); +} + +static void +composite_unaligned_trap_row(struct sna *sna, + struct sna_composite_spans_op *tmp, + xTrapezoid *trap, int dx, + int y1, int y2, int covered, + pixman_region16_t *clip) +{ + BoxRec box; + int opacity; + int x1, x2; + + if (covered == 0) + return; + + if (y2 > clip->extents.y2) + y2 = clip->extents.y2; + if (y1 < clip->extents.y1) + y1 = clip->extents.y1; + if (y1 >= y2) + return; + + x1 = dx + pixman_fixed_to_int(trap->left.p1.x); + x2 = dx + pixman_fixed_to_int(trap->right.p1.x); + if (x2 < clip->extents.x1 || x1 > clip->extents.x2) + return; + + box.y1 = y1; + box.y2 = y2; + + if (x1 == x2) { + box.x1 = x1; + box.x2 = x2 + 1; + + opacity = covered; + opacity *= coverage(SAMPLES_X, trap->right.p1.x) - coverage(SAMPLES_X, trap->left.p1.x); + + if (opacity) + composite_unaligned_box(sna, tmp, &box, + opacity/255., clip); + } else { + if (pixman_fixed_frac(trap->left.p1.x)) { + box.x1 = x1; + box.x2 = x1++; + + opacity = covered; + opacity *= SAMPLES_X - coverage(SAMPLES_X, trap->left.p1.x); + + if (opacity) + composite_unaligned_box(sna, tmp, &box, + opacity/255., clip); + } + + if (x2 > x1) { + box.x1 = x1; + box.x2 = x2; + + composite_unaligned_box(sna, tmp, &box, + covered*SAMPLES_X/255., clip); + } + + if (pixman_fixed_frac(trap->right.p1.x)) { + box.x1 = x2; + box.x2 = x2 + 1; + + opacity = covered; + opacity *= coverage(SAMPLES_X, trap->right.p1.x); + + if (opacity) + composite_unaligned_box(sna, tmp, &box, + opacity/255., clip); + } + } +} + +static void +composite_unaligned_trap(struct sna *sna, + struct sna_composite_spans_op *tmp, + xTrapezoid *trap, + int dx, int dy, + pixman_region16_t *clip) +{ + int y1, y2; + + y1 = dy + pixman_fixed_to_int(trap->top); + y2 = dy + pixman_fixed_to_int(trap->bottom); + + if (y1 == y2) { + composite_unaligned_trap_row(sna, tmp, trap, dx, + y1, y1 + 1, + coverage(SAMPLES_Y, trap->bottom) - coverage(SAMPLES_Y, trap->top), + clip); + } else { + if (pixman_fixed_frac(trap->top)) { + composite_unaligned_trap_row(sna, tmp, trap, dx, + y1, y1 + 1, + SAMPLES_Y - coverage(SAMPLES_Y, trap->top), + clip); + y1++; + } + + if (y2 > y1) + composite_unaligned_trap_row(sna, tmp, trap, dx, + y1, y2, + SAMPLES_Y, + clip); + + if (pixman_fixed_frac(trap->bottom)) + composite_unaligned_trap_row(sna, tmp, trap, dx, + y2, y2 + 1, + coverage(SAMPLES_Y, trap->bottom), + clip); + } +} + +inline static void +blt_opacity(PixmapPtr scratch, + int x1, int x2, + int y, int h, + uint8_t opacity) +{ + uint8_t *ptr; + + if (opacity == 0xff) + return; + + if (x1 < 0) + x1 = 0; + if (x2 > scratch->drawable.width) + x2 = scratch->drawable.width; + if (x1 >= x2) + return; + + x2 -= x1; + + ptr = scratch->devPrivate.ptr; + ptr += scratch->devKind * y; + ptr += x1; + do { + if (x2 == 1) + *ptr = opacity; + else + memset(ptr, opacity, x2); + ptr += scratch->devKind; + } while (--h); +} + +static void +blt_unaligned_box_row(PixmapPtr scratch, + BoxPtr extents, + xTrapezoid *trap, + int y1, int y2, + int covered) +{ + int x1, x2; + + if (y2 > scratch->drawable.height) + y2 = scratch->drawable.height; + if (y1 < 0) + y1 = 0; + if (y1 >= y2) + return; + + y2 -= y1; + + x1 = pixman_fixed_to_int(trap->left.p1.x); + x2 = pixman_fixed_to_int(trap->right.p1.x); + + x1 -= extents->x1; + x2 -= extents->x1; + + if (x1 == x2) { + blt_opacity(scratch, + x1, x1+1, + y1, y2, + covered * (coverage(SAMPLES_X, trap->right.p1.x) - coverage(SAMPLES_X, trap->left.p1.x))); + } else { + if (pixman_fixed_frac(trap->left.p1.x)) + blt_opacity(scratch, + x1, x1+1, + y1, y2, + covered * (SAMPLES_X - coverage(SAMPLES_X, trap->left.p1.x))); + + if (x2 > x1 + 1) { + blt_opacity(scratch, + x1 + 1, x2, + y1, y2, + covered*SAMPLES_X); + } + + if (pixman_fixed_frac(trap->right.p1.x)) + blt_opacity(scratch, + x2, x2 + 1, + y1, y2, + covered * coverage(SAMPLES_X, trap->right.p1.x)); + } +} + +static Bool +composite_unaligned_boxes_fallback(CARD8 op, + PicturePtr src, + PicturePtr dst, + INT16 src_x, INT16 src_y, + int ntrap, xTrapezoid *traps) +{ + ScreenPtr screen = dst->pDrawable->pScreen; + INT16 dst_x = pixman_fixed_to_int(traps[0].left.p1.x); + INT16 dst_y = pixman_fixed_to_int(traps[0].left.p1.y); + int dx = dst->pDrawable->x; + int dy = dst->pDrawable->y; + int n; + + for (n = 0; n < ntrap; n++) { + xTrapezoid *t = &traps[n]; + PixmapPtr scratch; + PicturePtr mask; + BoxRec extents; + int error; + int y1, y2; + + extents.x1 = pixman_fixed_to_int(t->left.p1.x); + extents.x2 = pixman_fixed_to_int(t->right.p1.x + pixman_fixed_1_minus_e); + extents.y1 = pixman_fixed_to_int(t->top); + extents.y2 = pixman_fixed_to_int(t->bottom + pixman_fixed_1_minus_e); + + if (!sna_compute_composite_extents(&extents, + src, NULL, dst, + src_x, src_y, + 0, 0, + extents.x1, extents.y1, + extents.x2 - extents.x1, + extents.y2 - extents.y1)) + continue; + + scratch = sna_pixmap_create_upload(screen, + extents.x2 - extents.x1, + extents.y2 - extents.y1, + 8); + if (!scratch) + continue; + + memset(scratch->devPrivate.ptr, 0xff, + scratch->devKind * (extents.y2 - extents.y1)); + + extents.x1 -= dx; + extents.x2 -= dx; + extents.y1 -= dy; + extents.y2 -= dy; + + y1 = pixman_fixed_to_int(t->top) - extents.y1; + y2 = pixman_fixed_to_int(t->bottom) - extents.y1; + + if (y1 == y2) { + blt_unaligned_box_row(scratch, &extents, t, y1, y1 + 1, + coverage(SAMPLES_Y, t->bottom) - coverage(SAMPLES_Y, t->top)); + } else { + if (pixman_fixed_frac(t->top)) + blt_unaligned_box_row(scratch, &extents, t, y1, y1 + 1, + SAMPLES_Y - coverage(SAMPLES_Y, t->top)); + + if (y2 > y1 + 1) + blt_unaligned_box_row(scratch, &extents, t, y1+1, y2, + SAMPLES_Y); + + if (pixman_fixed_frac(t->bottom)) + blt_unaligned_box_row(scratch, &extents, t, y2, y2+1, + coverage(SAMPLES_Y, t->bottom)); + } + + mask = CreatePicture(0, &scratch->drawable, + PictureMatchFormat(screen, 8, PICT_a8), + 0, 0, serverClient, &error); + screen->DestroyPixmap(scratch); + if (mask) { + CompositePicture(op, src, mask, dst, + src_x + extents.x1 - dst_x, + src_y + extents.y1 - dst_y, + 0, 0, + extents.x1, extents.y1, + extents.x2 - extents.x1, + extents.y2 - extents.y1); + FreePicture(mask, 0); + } + } + + return TRUE; +} + +static Bool +composite_unaligned_boxes(CARD8 op, + PicturePtr src, + PicturePtr dst, + PictFormatPtr maskFormat, + INT16 src_x, INT16 src_y, + int ntrap, xTrapezoid *traps) +{ + struct sna *sna; + BoxRec extents; + struct sna_composite_spans_op tmp; + pixman_region16_t clip; + int dst_x, dst_y; + int dx, dy, n; + + DBG(("%s\n", __FUNCTION__)); + + /* XXX need a span converter to handle overlapping traps */ + if (ntrap > 1 && maskFormat) + return false; + + sna = to_sna_from_drawable(dst->pDrawable); + if (!sna->render.composite_spans) + return composite_unaligned_boxes_fallback(op, src, dst, src_x, src_y, ntrap, traps); + + dst_x = extents.x1 = pixman_fixed_to_int(traps[0].left.p1.x); + extents.x2 = pixman_fixed_to_int(traps[0].right.p1.x + pixman_fixed_1_minus_e); + dst_y = extents.y1 = pixman_fixed_to_int(traps[0].top); + extents.y2 = pixman_fixed_to_int(traps[0].bottom + pixman_fixed_1_minus_e); + + DBG(("%s: src=(%d, %d), dst=(%d, %d)\n", + __FUNCTION__, src_x, src_y, dst_x, dst_y)); + + for (n = 1; n < ntrap; n++) { + int x1 = pixman_fixed_to_int(traps[n].left.p1.x); + int x2 = pixman_fixed_to_int(traps[n].right.p1.x + pixman_fixed_1_minus_e); + int y1 = pixman_fixed_to_int(traps[n].top); + int y2 = pixman_fixed_to_int(traps[n].bottom + pixman_fixed_1_minus_e); + + if (x1 < extents.x1) + extents.x1 = x1; + if (x2 > extents.x2) + extents.x2 = x2; + if (y1 < extents.y1) + extents.y1 = y1; + if (y2 > extents.y2) + extents.y2 = y2; + } + + DBG(("%s: extents (%d, %d), (%d, %d)\n", __FUNCTION__, + extents.x1, extents.y1, extents.x2, extents.y2)); + + if (!sna_compute_composite_region(&clip, + src, NULL, dst, + src_x + extents.x1 - dst_x, + src_y + extents.y1 - dst_y, + 0, 0, + extents.x1, extents.y1, + extents.x2 - extents.x1, + extents.y2 - extents.y1)) { + DBG(("%s: trapezoids do not intersect drawable clips\n", + __FUNCTION__)) ; + return true; + } + + extents = *RegionExtents(&clip); + dx = dst->pDrawable->x; + dy = dst->pDrawable->y; + + DBG(("%s: after clip -- extents (%d, %d), (%d, %d), delta=(%d, %d) src -> (%d, %d)\n", + __FUNCTION__, + extents.x1, extents.y1, + extents.x2, extents.y2, + dx, dy, + src_x + extents.x1 - dst_x - dx, + src_y + extents.y1 - dst_y - dy)); + + memset(&tmp, 0, sizeof(tmp)); + if (!sna->render.composite_spans(sna, op, src, dst, + src_x + extents.x1 - dst_x - dx, + src_y + extents.y1 - dst_y - dy, + extents.x1, extents.y1, + extents.x2 - extents.x1, + extents.y2 - extents.y1, + &tmp)) { + DBG(("%s: composite spans render op not supported\n", + __FUNCTION__)); + return false; + } + + for (n = 0; n < ntrap; n++) + composite_unaligned_trap(sna, &tmp, &traps[n], dx, dy, &clip); + tmp.done(sna, &tmp); + + REGION_UNINIT(NULL, &clip); + return true; +} + +static bool +tor_scan_converter(CARD8 op, PicturePtr src, PicturePtr dst, + PictFormatPtr maskFormat, INT16 src_x, INT16 src_y, + int ntrap, xTrapezoid *traps) +{ + struct sna *sna; + struct sna_composite_spans_op tmp; + struct tor tor; + void (*span)(struct sna *sna, + struct sna_composite_spans_op *op, + pixman_region16_t *clip, + const BoxRec *box, + int coverage); + BoxRec extents; + pixman_region16_t clip; + int16_t dst_x, dst_y; + int16_t dx, dy; + int n; + + /* XXX strict adhernce to the Reneder specification */ + if (dst->polyMode == PolyModePrecise) { + DBG(("%s: fallback -- precise rasterisation requested\n", + __FUNCTION__)); + return false; + } + + sna = to_sna_from_drawable(dst->pDrawable); + if (!sna->render.composite_spans) { + DBG(("%s: fallback -- composite spans not supported\n", + __FUNCTION__)); + return false; + } + + dst_x = pixman_fixed_to_int(traps[0].left.p1.x); + dst_y = pixman_fixed_to_int(traps[0].left.p1.y); + + miTrapezoidBounds(ntrap, traps, &extents); + if (extents.y1 >= extents.y2 || extents.x1 >= extents.x2) + return true; + + DBG(("%s: extents (%d, %d), (%d, %d)\n", + __FUNCTION__, extents.x1, extents.y1, extents.x2, extents.y2)); + + if (!sna_compute_composite_region(&clip, + src, NULL, dst, + src_x + extents.x1 - dst_x, + src_y + extents.y1 - dst_y, + 0, 0, + extents.x1, extents.y1, + extents.x2 - extents.x1, + extents.y2 - extents.y1)) { + DBG(("%s: trapezoids do not intersect drawable clips\n", + __FUNCTION__)) ; + return true; + } + + extents = *RegionExtents(&clip); + dx = dst->pDrawable->x; + dy = dst->pDrawable->y; + + DBG(("%s: after clip -- extents (%d, %d), (%d, %d), delta=(%d, %d) src -> (%d, %d)\n", + __FUNCTION__, + extents.x1, extents.y1, + extents.x2, extents.y2, + dx, dy, + src_x + extents.x1 - dst_x - dx, + src_y + extents.y1 - dst_y - dy)); + + memset(&tmp, 0, sizeof(tmp)); + if (!sna->render.composite_spans(sna, op, src, dst, + src_x + extents.x1 - dst_x - dx, + src_y + extents.y1 - dst_y - dy, + extents.x1, extents.y1, + extents.x2 - extents.x1, + extents.y2 - extents.y1, + &tmp)) { + DBG(("%s: fallback -- composite spans render op not supported\n", + __FUNCTION__)); + return false; + } + + dx *= FAST_SAMPLES_X; + dy *= FAST_SAMPLES_Y; + if (tor_init(&tor, &extents, 2*ntrap)) + goto skip; + + for (n = 0; n < ntrap; n++) { + int top, bottom; + + if (!xTrapezoidValid(&traps[n])) + continue; + + if (pixman_fixed_to_int(traps[n].top) + dst->pDrawable->y >= extents.y2 || + pixman_fixed_to_int(traps[n].bottom) + dst->pDrawable->y < extents.y1) + continue; + + top = dy + (traps[n].top >> (16 - FAST_SAMPLES_Y_shift)); + bottom = dy + (traps[n].bottom >> (16 - FAST_SAMPLES_Y_shift)); + if (top >= bottom) + continue; + + tor_add_edge(&tor, dx, dy, top, bottom, &traps[n].left, 1); + tor_add_edge(&tor, dx, dy, top, bottom, &traps[n].right, -1); + } + + if (maskFormat ? maskFormat->depth < 8 : dst->polyEdge == PolyEdgeSharp) { + /* XXX An imprecise approximation */ + if (maskFormat && !operator_is_bounded(op)) { + span = tor_blt_span_mono_unbounded; + if (REGION_NUM_RECTS(&clip) > 1) + span = tor_blt_span_mono_unbounded_clipped; + } else { + span = tor_blt_span_mono; + if (REGION_NUM_RECTS(&clip) > 1) + span = tor_blt_span_mono_clipped; + } + } else { + span = tor_blt_span; + if (REGION_NUM_RECTS(&clip) > 1) + span = tor_blt_span_clipped; + } + + tor_render(sna, &tor, &tmp, &clip, span, + maskFormat && !operator_is_bounded(op)); + +skip: + tor_fini(&tor); + tmp.done(sna, &tmp); + + REGION_UNINIT(NULL, &clip); + return true; +} + +void +sna_composite_trapezoids(CARD8 op, + PicturePtr src, + PicturePtr dst, + PictFormatPtr maskFormat, + INT16 xSrc, INT16 ySrc, + int ntrap, xTrapezoid *traps) +{ + struct sna *sna = to_sna_from_drawable(dst->pDrawable); + bool rectilinear = true; + bool pixel_aligned = true; + int n; + + DBG(("%s(op=%d, src=(%d, %d), mask=%08x, ntrap=%d)\n", __FUNCTION__, + op, xSrc, ySrc, + maskFormat ? (int)maskFormat->format : 0, + ntrap)); + + if (ntrap == 0) + return; + + if (NO_ACCEL) + goto fallback; + + if (sna->kgem.wedged || !sna->have_render) { + DBG(("%s: fallback -- wedged=%d, have_render=%d\n", + __FUNCTION__, sna->kgem.wedged, sna->have_render)); + goto fallback; + } + + if (dst->alphaMap || src->alphaMap) { + DBG(("%s: fallback -- alpha maps=(dst=%p, src=%p)\n", + __FUNCTION__, dst->alphaMap, src->alphaMap)); + goto fallback; + } + + if (too_small(sna, dst->pDrawable) && !picture_is_gpu(src)) { + DBG(("%s: fallback -- dst is too small, %dx%d\n", + __FUNCTION__, + dst->pDrawable->width, + dst->pDrawable->height)); + goto fallback; + } + + /* scan through for fast rectangles */ + for (n = 0; n < ntrap && rectilinear; n++) { + rectilinear &= + traps[n].left.p1.x == traps[n].left.p2.x && + traps[n].right.p1.x == traps[n].right.p2.x; + pixel_aligned &= + ((traps[n].top | traps[n].bottom | + traps[n].left.p1.x | traps[n].left.p2.x | + traps[n].right.p1.x | traps[n].right.p2.x) + & pixman_fixed_1_minus_e) == 0; + } + + if (rectilinear) { + pixel_aligned |= maskFormat ? + maskFormat->depth == 1 : + dst->polyEdge == PolyEdgeSharp; + if (pixel_aligned) { + if (composite_aligned_boxes(op, src, dst, + maskFormat, + xSrc, ySrc, + ntrap, traps)) + return; + } else { + if (composite_unaligned_boxes(op, src, dst, + maskFormat, + xSrc, ySrc, + ntrap, traps)) + return; + } + } + + if (tor_scan_converter(op, src, dst, maskFormat, + xSrc, ySrc, ntrap, traps)) + return; + +fallback: + DBG(("%s: fallback mask=%08x, ntrap=%d\n", __FUNCTION__, + maskFormat ? (unsigned)maskFormat->format : 0, ntrap)); + trapezoids_fallback(op, src, dst, maskFormat, + xSrc, ySrc, + ntrap, traps); +} diff --git a/src/sna/sna_video.c b/src/sna/sna_video.c new file mode 100644 index 00000000..b6cbda22 --- /dev/null +++ b/src/sna/sna_video.c @@ -0,0 +1,737 @@ +/*************************************************************************** + + Copyright 2000 Intel Corporation. All Rights Reserved. + + Permission is hereby granted, free of charge, to any person obtaining a + copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sub license, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial portions + of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + IN NO EVENT SHALL INTEL, AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR + THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **************************************************************************/ + +/* + * i830_video.c: i830/i845 Xv driver. + * + * Copyright © 2002 by Alan Hourihane and David Dawes + * + * Authors: + * Alan Hourihane <alanh@tungstengraphics.com> + * David Dawes <dawes@xfree86.org> + * + * Derived from i810 Xv driver: + * + * Authors of i810 code: + * Jonathan Bian <jonathan.bian@intel.com> + * Offscreen Images: + * Matt Sottek <matthew.j.sottek@intel.com> + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include <inttypes.h> +#include <math.h> +#include <string.h> +#include <assert.h> +#include <errno.h> + +#include <sys/mman.h> + +#include "sna.h" +#include "sna_reg.h" +#include "sna_video.h" + +#include <xf86xv.h> +#include <X11/extensions/Xv.h> + +#ifdef SNA_XVMC +#define _SNA_XVMC_SERVER_ +#include "sna_video_hwmc.h" +#else +static inline Bool sna_video_xvmc_setup(struct sna *sna, + ScreenPtr ptr, + XF86VideoAdaptorPtr target) +{ + return FALSE; +} +#endif + +#if DEBUG_VIDEO_TEXTURED +#undef DBG +#define DBG(x) ErrorF x +#endif + +void sna_video_free_buffers(struct sna *sna, struct sna_video *video) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(video->old_buf); i++) { + if (video->old_buf[i]) { + kgem_bo_destroy(&sna->kgem, video->old_buf[i]); + video->old_buf[i] = NULL; + } + } + + if (video->buf) { + kgem_bo_destroy(&sna->kgem, video->buf); + video->buf = NULL; + } +} + +void sna_video_frame_fini(struct sna *sna, + struct sna_video *video, + struct sna_video_frame *frame) +{ + struct kgem_bo *bo; + + if (!frame->bo->reusable) { + kgem_bo_destroy(&sna->kgem, frame->bo); + return; + } + + bo = video->old_buf[1]; + video->old_buf[1] = video->old_buf[0]; + video->old_buf[0] = video->buf; + video->buf = bo; +} + +Bool +sna_video_clip_helper(ScrnInfoPtr scrn, + struct sna_video *video, + xf86CrtcPtr * crtc_ret, + BoxPtr dst, + short src_x, short src_y, + short drw_x, short drw_y, + short src_w, short src_h, + short drw_w, short drw_h, + int id, + int *top, int* left, int* npixels, int *nlines, + RegionPtr reg, INT32 width, INT32 height) +{ + Bool ret; + RegionRec crtc_region_local; + RegionPtr crtc_region = reg; + BoxRec crtc_box; + INT32 x1, x2, y1, y2; + xf86CrtcPtr crtc; + + x1 = src_x; + x2 = src_x + src_w; + y1 = src_y; + y2 = src_y + src_h; + + dst->x1 = drw_x; + dst->x2 = drw_x + drw_w; + dst->y1 = drw_y; + dst->y2 = drw_y + drw_h; + + /* + * For overlay video, compute the relevant CRTC and + * clip video to that + */ + crtc = sna_covering_crtc(scrn, dst, video->desired_crtc, + &crtc_box); + + /* For textured video, we don't actually want to clip at all. */ + if (crtc && !video->textured) { + RegionInit(&crtc_region_local, &crtc_box, 1); + crtc_region = &crtc_region_local; + RegionIntersect(crtc_region, crtc_region, reg); + } + *crtc_ret = crtc; + + ret = xf86XVClipVideoHelper(dst, &x1, &x2, &y1, &y2, + crtc_region, width, height); + if (crtc_region != reg) + RegionUninit(&crtc_region_local); + + *top = y1 >> 16; + *left = (x1 >> 16) & ~1; + *npixels = ALIGN(((x2 + 0xffff) >> 16), 2) - *left; + if (is_planar_fourcc(id)) { + *top &= ~1; + *nlines = ALIGN(((y2 + 0xffff) >> 16), 2) - *top; + } else + *nlines = ((y2 + 0xffff) >> 16) - *top; + + return ret; +} + +void +sna_video_frame_init(struct sna *sna, + struct sna_video *video, + int id, short width, short height, + struct sna_video_frame *frame) +{ + int align; + + frame->id = id; + frame->width = width; + frame->height = height; + + /* Only needs to be DWORD-aligned for textured on i915, but overlay has + * stricter requirements. + */ + if (video->textured) { + align = 4; + } else { + if (sna->kgem.gen >= 40) + /* Actually the alignment is 64 bytes, too. But the + * stride must be at least 512 bytes. Take the easy fix + * and align on 512 bytes unconditionally. */ + align = 512; + else if (IS_I830(sna) || IS_845G(sna)) + /* Harsh, errata on these chipsets limit the stride + * to be a multiple of 256 bytes. + */ + align = 256; + else + align = 64; + } + +#if SNA_XVMC + /* for i915 xvmc, hw requires 1kb aligned surfaces */ + if (id == FOURCC_XVMC && sna->kgem.gen < 40) + align = 1024; +#endif + + + /* Determine the desired destination pitch (representing the chroma's pitch, + * in the planar case. + */ + if (is_planar_fourcc(id)) { + if (video->rotation & (RR_Rotate_90 | RR_Rotate_270)) { + frame->pitch[0] = ALIGN((height / 2), align); + frame->pitch[1] = ALIGN(height, align); + frame->size = frame->pitch[0] * width * 3; + } else { + frame->pitch[0] = ALIGN((width / 2), align); + frame->pitch[1] = ALIGN(width, align); + frame->size = frame->pitch[0] * height * 3; + } + } else { + if (video->rotation & (RR_Rotate_90 | RR_Rotate_270)) { + frame->pitch[0] = ALIGN((height << 1), align); + frame->size = frame->pitch[0] * width; + } else { + frame->pitch[0] = ALIGN((width << 1), align); + frame->size = frame->pitch[0] * height; + } + frame->pitch[1] = 0; + } + + frame->YBufOffset = 0; + + if (video->rotation & (RR_Rotate_90 | RR_Rotate_270)) { + frame->UBufOffset = + frame->YBufOffset + frame->pitch[1] * width; + frame->VBufOffset = + frame->UBufOffset + frame->pitch[0] * width / 2; + } else { + frame->UBufOffset = + frame->YBufOffset + frame->pitch[1] * height; + frame->VBufOffset = + frame->UBufOffset + frame->pitch[0] * height / 2; + } +} + +static struct kgem_bo * +sna_video_buffer(struct sna *sna, + struct sna_video *video, + struct sna_video_frame *frame) +{ + /* Free the current buffer if we're going to have to reallocate */ + if (video->buf && video->buf->size < frame->size) + sna_video_free_buffers(sna, video); + + if (video->buf == NULL) + video->buf = kgem_create_linear(&sna->kgem, frame->size); + + return video->buf; +} + +static void sna_memcpy_plane(unsigned char *dst, unsigned char *src, + int height, int width, + int dstPitch, int srcPitch, Rotation rotation) +{ + int i, j = 0; + unsigned char *s; + + switch (rotation) { + case RR_Rotate_0: + /* optimise for the case of no clipping */ + if (srcPitch == dstPitch && srcPitch == width) + memcpy(dst, src, srcPitch * height); + else + for (i = 0; i < height; i++) { + memcpy(dst, src, width); + src += srcPitch; + dst += dstPitch; + } + break; + case RR_Rotate_90: + for (i = 0; i < height; i++) { + s = src; + for (j = 0; j < width; j++) { + dst[(i) + ((width - j - 1) * dstPitch)] = *s++; + } + src += srcPitch; + } + break; + case RR_Rotate_180: + for (i = 0; i < height; i++) { + s = src; + for (j = 0; j < width; j++) { + dst[(width - j - 1) + + ((height - i - 1) * dstPitch)] = *s++; + } + src += srcPitch; + } + break; + case RR_Rotate_270: + for (i = 0; i < height; i++) { + s = src; + for (j = 0; j < width; j++) { + dst[(height - i - 1) + (j * dstPitch)] = *s++; + } + src += srcPitch; + } + break; + } +} + +static void +sna_copy_planar_data(struct sna *sna, + struct sna_video *video, + struct sna_video_frame *frame, + unsigned char *buf, + unsigned char *dst, + int srcPitch, int srcPitch2, + int srcH, int top, int left) +{ + unsigned char *src1, *src2, *src3, *dst1, *dst2, *dst3; + + /* Copy Y data */ + src1 = buf + (top * srcPitch) + left; + + dst1 = dst + frame->YBufOffset; + + sna_memcpy_plane(dst1, src1, + frame->height, frame->width, + frame->pitch[1], srcPitch, + video->rotation); + + /* Copy V data for YV12, or U data for I420 */ + src2 = buf + /* start of YUV data */ + (srcH * srcPitch) + /* move over Luma plane */ + ((top >> 1) * srcPitch2) + /* move down from by top lines */ + (left >> 1); /* move left by left pixels */ + + if (frame->id == FOURCC_I420) + dst2 = dst + frame->UBufOffset; + else + dst2 = dst + frame->VBufOffset; + + sna_memcpy_plane(dst2, src2, + frame->height / 2, frame->width / 2, + frame->pitch[0], srcPitch2, + video->rotation); + + /* Copy U data for YV12, or V data for I420 */ + src3 = buf + /* start of YUV data */ + (srcH * srcPitch) + /* move over Luma plane */ + ((srcH >> 1) * srcPitch2) + /* move over Chroma plane */ + ((top >> 1) * srcPitch2) + /* move down from by top lines */ + (left >> 1); /* move left by left pixels */ + if (frame->id == FOURCC_I420) + dst3 = dst + frame->VBufOffset; + else + dst3 = dst + frame->UBufOffset; + + sna_memcpy_plane(dst3, src3, + frame->height / 2, frame->width / 2, + frame->pitch[0], srcPitch2, + video->rotation); +} + +static void +sna_copy_packed_data(struct sna *sna, + struct sna_video *video, + struct sna_video_frame *frame, + unsigned char *buf, + unsigned char *dst, + int srcPitch, + int top, int left) +{ + unsigned char *src; + unsigned char *s; + int i, j; + + src = buf + (top * srcPitch) + (left << 1); + + dst += frame->YBufOffset; + + switch (video->rotation) { + case RR_Rotate_0: + frame->width <<= 1; + for (i = 0; i < frame->height; i++) { + memcpy(dst, src, frame->width); + src += srcPitch; + dst += frame->pitch[0]; + } + break; + case RR_Rotate_90: + frame->height <<= 1; + for (i = 0; i < frame->height; i += 2) { + s = src; + for (j = 0; j < frame->width; j++) { + /* Copy Y */ + dst[(i + 0) + ((frame->width - j - 1) * frame->pitch[0])] = *s++; + (void)*s++; + } + src += srcPitch; + } + frame->height >>= 1; + src = buf + (top * srcPitch) + (left << 1); + for (i = 0; i < frame->height; i += 2) { + for (j = 0; j < frame->width; j += 2) { + /* Copy U */ + dst[((i * 2) + 1) + ((frame->width - j - 1) * frame->pitch[0])] = + src[(j * 2) + 1 + (i * srcPitch)]; + dst[((i * 2) + 1) + ((frame->width - j - 2) * frame->pitch[0])] = + src[(j * 2) + 1 + ((i + 1) * srcPitch)]; + /* Copy V */ + dst[((i * 2) + 3) + ((frame->width - j - 1) * frame->pitch[0])] = + src[(j * 2) + 3 + (i * srcPitch)]; + dst[((i * 2) + 3) + ((frame->width - j - 2) * frame->pitch[0])] = + src[(j * 2) + 3 + ((i + 1) * srcPitch)]; + } + } + break; + case RR_Rotate_180: + frame->width <<= 1; + for (i = 0; i < frame->height; i++) { + s = src; + for (j = 0; j < frame->width; j += 4) { + dst[(frame->width - j - 4) + ((frame->height - i - 1) * frame->pitch[0])] = + *s++; + dst[(frame->width - j - 3) + ((frame->height - i - 1) * frame->pitch[0])] = + *s++; + dst[(frame->width - j - 2) + ((frame->height - i - 1) * frame->pitch[0])] = + *s++; + dst[(frame->width - j - 1) + ((frame->height - i - 1) * frame->pitch[0])] = + *s++; + } + src += srcPitch; + } + break; + case RR_Rotate_270: + frame->height <<= 1; + for (i = 0; i < frame->height; i += 2) { + s = src; + for (j = 0; j < frame->width; j++) { + /* Copy Y */ + dst[(frame->height - i - 2) + (j * frame->pitch[0])] = *s++; + (void)*s++; + } + src += srcPitch; + } + frame->height >>= 1; + src = buf + (top * srcPitch) + (left << 1); + for (i = 0; i < frame->height; i += 2) { + for (j = 0; j < frame->width; j += 2) { + /* Copy U */ + dst[(((frame->height - i) * 2) - 3) + (j * frame->pitch[0])] = + src[(j * 2) + 1 + (i * srcPitch)]; + dst[(((frame->height - i) * 2) - 3) + + ((j + 1) * frame->pitch[0])] = + src[(j * 2) + 1 + ((i + 1) * srcPitch)]; + /* Copy V */ + dst[(((frame->height - i) * 2) - 1) + (j * frame->pitch[0])] = + src[(j * 2) + 3 + (i * srcPitch)]; + dst[(((frame->height - i) * 2) - 1) + + ((j + 1) * frame->pitch[0])] = + src[(j * 2) + 3 + ((i + 1) * srcPitch)]; + } + } + break; + } +} + +Bool +sna_video_copy_data(struct sna *sna, + struct sna_video *video, + struct sna_video_frame *frame, + int top, int left, + int npixels, int nlines, + unsigned char *buf) +{ + unsigned char *dst; + + frame->bo = sna_video_buffer(sna, video, frame); + if (frame->bo == NULL) + return FALSE; + + /* copy data */ + dst = kgem_bo_map(&sna->kgem, frame->bo, PROT_READ | PROT_WRITE); + if (dst == NULL) + return FALSE; + + if (is_planar_fourcc(frame->id)) { + int srcPitch = ALIGN(frame->width, 0x4); + int srcPitch2 = ALIGN((frame->width >> 1), 0x4); + + sna_copy_planar_data(sna, video, frame, + buf, dst, + srcPitch, srcPitch2, + nlines, top, left); + } else { + int srcPitch = frame->width << 1; + + sna_copy_packed_data(sna, video, frame, + buf, dst, + srcPitch, + top, left); + } + + munmap(dst, video->buf->size); + return TRUE; +} + +static void sna_crtc_box(xf86CrtcPtr crtc, BoxPtr crtc_box) +{ + if (crtc->enabled) { + crtc_box->x1 = crtc->x; + crtc_box->x2 = + crtc->x + xf86ModeWidth(&crtc->mode, crtc->rotation); + crtc_box->y1 = crtc->y; + crtc_box->y2 = + crtc->y + xf86ModeHeight(&crtc->mode, crtc->rotation); + } else + crtc_box->x1 = crtc_box->x2 = crtc_box->y1 = crtc_box->y2 = 0; +} + +static void sna_box_intersect(BoxPtr dest, BoxPtr a, BoxPtr b) +{ + dest->x1 = a->x1 > b->x1 ? a->x1 : b->x1; + dest->x2 = a->x2 < b->x2 ? a->x2 : b->x2; + dest->y1 = a->y1 > b->y1 ? a->y1 : b->y1; + dest->y2 = a->y2 < b->y2 ? a->y2 : b->y2; + if (dest->x1 >= dest->x2 || dest->y1 >= dest->y2) + dest->x1 = dest->x2 = dest->y1 = dest->y2 = 0; +} + +static int sna_box_area(BoxPtr box) +{ + return (int)(box->x2 - box->x1) * (int)(box->y2 - box->y1); +} + +/* + * Return the crtc covering 'box'. If two crtcs cover a portion of + * 'box', then prefer 'desired'. If 'desired' is NULL, then prefer the crtc + * with greater coverage + */ + +xf86CrtcPtr +sna_covering_crtc(ScrnInfoPtr scrn, + BoxPtr box, xf86CrtcPtr desired, BoxPtr crtc_box_ret) +{ + xf86CrtcConfigPtr xf86_config = XF86_CRTC_CONFIG_PTR(scrn); + xf86CrtcPtr crtc, best_crtc; + int coverage, best_coverage; + int c; + BoxRec crtc_box, cover_box; + + DBG(("%s for box=(%d, %d), (%d, %d)\n", + __FUNCTION__, box->x1, box->y1, box->x2, box->y2)); + + best_crtc = NULL; + best_coverage = 0; + crtc_box_ret->x1 = 0; + crtc_box_ret->x2 = 0; + crtc_box_ret->y1 = 0; + crtc_box_ret->y2 = 0; + for (c = 0; c < xf86_config->num_crtc; c++) { + crtc = xf86_config->crtc[c]; + + /* If the CRTC is off, treat it as not covering */ + if (!sna_crtc_on(crtc)) { + DBG(("%s: crtc %d off, skipping\n", __FUNCTION__, c)); + continue; + } + + sna_crtc_box(crtc, &crtc_box); + sna_box_intersect(&cover_box, &crtc_box, box); + coverage = sna_box_area(&cover_box); + if (coverage && crtc == desired) { + DBG(("%s: box is on desired crtc [%p]\n", + __FUNCTION__, crtc)); + *crtc_box_ret = crtc_box; + return crtc; + } + if (coverage > best_coverage) { + *crtc_box_ret = crtc_box; + best_crtc = crtc; + best_coverage = coverage; + } + } + DBG(("%s: best crtc = %p\n", __FUNCTION__, best_crtc)); + return best_crtc; +} + +bool +sna_wait_for_scanline(struct sna *sna, PixmapPtr pixmap, + xf86CrtcPtr crtc, RegionPtr clip) +{ + pixman_box16_t box, crtc_box; + int pipe, event; + Bool full_height; + int y1, y2; + uint32_t *b; + + /* XXX no wait for scanline support on SNB? */ + if (sna->kgem.gen >= 60) + return false; + + if (!pixmap_is_scanout(pixmap)) + return false; + + if (crtc == NULL) { + if (clip) { + crtc_box = *REGION_EXTENTS(NULL, clip); + } else { + crtc_box.x1 = 0; /* XXX drawable offsets? */ + crtc_box.y1 = 0; + crtc_box.x2 = pixmap->drawable.width; + crtc_box.y2 = pixmap->drawable.height; + } + crtc = sna_covering_crtc(sna->scrn, &crtc_box, NULL, &crtc_box); + } + + if (crtc == NULL) + return false; + + if (clip) { + box = *REGION_EXTENTS(unused, clip); + + if (crtc->transform_in_use) + pixman_f_transform_bounds(&crtc->f_framebuffer_to_crtc, &box); + + /* We could presume the clip was correctly computed... */ + sna_crtc_box(crtc, &crtc_box); + sna_box_intersect(&box, &crtc_box, &box); + + /* + * Make sure we don't wait for a scanline that will + * never occur + */ + y1 = (crtc_box.y1 <= box.y1) ? box.y1 - crtc_box.y1 : 0; + y2 = (box.y2 <= crtc_box.y2) ? + box.y2 - crtc_box.y1 : crtc_box.y2 - crtc_box.y1; + if (y2 <= y1) + return false; + + full_height = FALSE; + if (y1 == 0 && y2 == (crtc_box.y2 - crtc_box.y1)) + full_height = TRUE; + } else { + sna_crtc_box(crtc, &crtc_box); + y1 = crtc_box.y1; + y2 = crtc_box.y2; + full_height = TRUE; + } + + /* + * Pre-965 doesn't have SVBLANK, so we need a bit + * of extra time for the blitter to start up and + * do its job for a full height blit + */ + if (sna_crtc_to_pipe(crtc) == 0) { + pipe = MI_LOAD_SCAN_LINES_DISPLAY_PIPEA; + event = MI_WAIT_FOR_PIPEA_SCAN_LINE_WINDOW; + if (full_height) + event = MI_WAIT_FOR_PIPEA_SVBLANK; + } else { + pipe = MI_LOAD_SCAN_LINES_DISPLAY_PIPEB; + event = MI_WAIT_FOR_PIPEB_SCAN_LINE_WINDOW; + if (full_height) + event = MI_WAIT_FOR_PIPEB_SVBLANK; + } + + if (crtc->mode.Flags & V_INTERLACE) { + /* DSL count field lines */ + y1 /= 2; + y2 /= 2; + } + + b = kgem_get_batch(&sna->kgem, 5); + /* The documentation says that the LOAD_SCAN_LINES command + * always comes in pairs. Don't ask me why. */ + b[0] = MI_LOAD_SCAN_LINES_INCL | pipe; + b[1] = (y1 << 16) | (y2-1); + b[2] = MI_LOAD_SCAN_LINES_INCL | pipe; + b[3] = (y1 << 16) | (y2-1); + b[4] = MI_WAIT_FOR_EVENT | event; + kgem_advance_batch(&sna->kgem, 5); + return true; +} + +void sna_video_init(struct sna *sna, ScreenPtr screen) +{ + XF86VideoAdaptorPtr *adaptors, *newAdaptors; + XF86VideoAdaptorPtr textured, overlay; + int num_adaptors; + int prefer_overlay = + xf86ReturnOptValBool(sna->Options, OPTION_PREFER_OVERLAY, FALSE); + + num_adaptors = xf86XVListGenericAdaptors(sna->scrn, &adaptors); + newAdaptors = + malloc((num_adaptors + 2) * sizeof(XF86VideoAdaptorPtr *)); + if (newAdaptors == NULL) + return; + + memcpy(newAdaptors, adaptors, + num_adaptors * sizeof(XF86VideoAdaptorPtr)); + adaptors = newAdaptors; + + /* Set up textured video if we can do it at this depth and we are on + * supported hardware. + */ + textured = sna_video_textured_setup(sna, screen); + overlay = sna_video_overlay_setup(sna, screen); + + if (overlay && prefer_overlay) + adaptors[num_adaptors++] = overlay; + + if (textured) + adaptors[num_adaptors++] = textured; + + if (overlay && !prefer_overlay) + adaptors[num_adaptors++] = overlay; + + if (num_adaptors) + xf86XVScreenInit(screen, adaptors, num_adaptors); + else + xf86DrvMsg(sna->scrn->scrnIndex, X_WARNING, + "Disabling Xv because no adaptors could be initialized.\n"); + if (textured) + sna_video_xvmc_setup(sna, screen, textured); + + free(adaptors); +} diff --git a/src/sna/sna_video.h b/src/sna/sna_video.h new file mode 100644 index 00000000..f66a6977 --- /dev/null +++ b/src/sna/sna_video.h @@ -0,0 +1,130 @@ +/*************************************************************************** + +Copyright 2000 Intel Corporation. All Rights Reserved. + +Permission is hereby granted, free of charge, to any person obtaining a +copy of this software and associated documentation files (the +"Software"), to deal in the Software without restriction, including +without limitation the rights to use, copy, modify, merge, publish, +distribute, sub license, and/or sell copies of the Software, and to +permit persons to whom the Software is furnished to do so, subject to +the following conditions: + +The above copyright notice and this permission notice (including the +next paragraph) shall be included in all copies or substantial portions +of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS +OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF +MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. +IN NO EVENT SHALL INTEL, AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, +DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR +OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR +THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +**************************************************************************/ + +#ifndef SNA_VIDEO_H +#define SNA_VIDEO_H + +#include <xf86.h> +#include <xf86_OSproc.h> +#include <fourcc.h> + +#if defined(XvMCExtension) && defined(ENABLE_XVMC) +#define SNA_XVMC 1 +#endif + +struct sna_video { + int brightness; + int contrast; + int saturation; + xf86CrtcPtr desired_crtc; + + RegionRec clip; + + uint32_t gamma0; + uint32_t gamma1; + uint32_t gamma2; + uint32_t gamma3; + uint32_t gamma4; + uint32_t gamma5; + + int color_key; + + /** YUV data buffers */ + struct kgem_bo *old_buf[2]; + struct kgem_bo *buf; + + Bool textured; + Rotation rotation; + + int SyncToVblank; /* -1: auto, 0: off, 1: on */ +}; + +struct sna_video_frame { + struct kgem_bo *bo; + int id; + int width, height; + int pitch[2]; + int size; + uint32_t YBufOffset; + uint32_t UBufOffset; + uint32_t VBufOffset; +}; + +void sna_video_init(struct sna *sna, ScreenPtr screen); +XF86VideoAdaptorPtr sna_video_overlay_setup(struct sna *sna, + ScreenPtr screen); +XF86VideoAdaptorPtr sna_video_textured_setup(struct sna *sna, + ScreenPtr screen); + +#define FOURCC_XVMC (('C' << 24) + ('M' << 16) + ('V' << 8) + 'X') + +static inline int is_planar_fourcc(int id) +{ + switch (id) { + case FOURCC_YV12: + case FOURCC_I420: + case FOURCC_XVMC: + return 1; + case FOURCC_UYVY: + case FOURCC_YUY2: + default: + return 0; + } +} + +Bool +sna_video_clip_helper(ScrnInfoPtr scrn, + struct sna_video *adaptor_priv, + xf86CrtcPtr * crtc_ret, + BoxPtr dst, + short src_x, short src_y, + short drw_x, short drw_y, + short src_w, short src_h, + short drw_w, short drw_h, + int id, + int *top, int* left, int* npixels, int *nlines, + RegionPtr reg, INT32 width, INT32 height); + +void +sna_video_frame_init(struct sna *sna, + struct sna_video *video, + int id, short width, short height, + struct sna_video_frame *frame); + +Bool +sna_video_copy_data(struct sna *sna, + struct sna_video *video, + struct sna_video_frame *frame, + int top, int left, + int npixels, int nlines, + unsigned char *buf); + +void sna_video_frame_fini(struct sna *sna, + struct sna_video *video, + struct sna_video_frame *frame); +void sna_video_free_buffers(struct sna *sna, struct sna_video *video); + +#endif /* SNA_VIDEO_H */ diff --git a/src/sna/sna_video_hwmc.c b/src/sna/sna_video_hwmc.c new file mode 100644 index 00000000..3da7d3a5 --- /dev/null +++ b/src/sna/sna_video_hwmc.c @@ -0,0 +1,252 @@ +/* + * Copyright © 2007 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Zhenyu Wang <zhenyu.z.wang@sna.com> + * + */ +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#define _SNA_XVMC_SERVER_ +#include "sna.h" +#include "sna_video_hwmc.h" + +#include <X11/extensions/Xv.h> +#include <X11/extensions/XvMC.h> +#include <fourcc.h> + +static int create_subpicture(ScrnInfoPtr scrn, XvMCSubpicturePtr subpicture, + int *num_priv, CARD32 ** priv) +{ + return Success; +} + +static void destroy_subpicture(ScrnInfoPtr scrn, XvMCSubpicturePtr subpicture) +{ +} + +static int create_surface(ScrnInfoPtr scrn, XvMCSurfacePtr surface, + int *num_priv, CARD32 ** priv) +{ + return Success; +} + +static void destroy_surface(ScrnInfoPtr scrn, XvMCSurfacePtr surface) +{ +} + +static int create_context(ScrnInfoPtr scrn, XvMCContextPtr pContext, + int *num_priv, CARD32 **priv) +{ + struct sna *sna = to_sna(scrn); + struct sna_xvmc_hw_context *contextRec; + + *priv = calloc(1, sizeof(struct sna_xvmc_hw_context)); + contextRec = (struct sna_xvmc_hw_context *) *priv; + if (!contextRec) { + *num_priv = 0; + return BadAlloc; + } + + *num_priv = sizeof(struct sna_xvmc_hw_context) >> 2; + + if (sna->kgem.gen >= 40) { + if (sna->kgem.gen >= 45) + contextRec->type = XVMC_I965_MPEG2_VLD; + else + contextRec->type = XVMC_I965_MPEG2_MC; + contextRec->i965.is_g4x = sna->kgem.gen == 45; + contextRec->i965.is_965_q = IS_965_Q(sna); + contextRec->i965.is_igdng = IS_GEN5(sna); + } else { + contextRec->type = XVMC_I915_MPEG2_MC; + contextRec->i915.use_phys_addr = 0; + } + + return Success; +} + +static void destroy_context(ScrnInfoPtr scrn, XvMCContextPtr context) +{ +} + +/* i915 hwmc support */ +static XF86MCSurfaceInfoRec i915_YV12_mpg2_surface = { + FOURCC_YV12, + XVMC_CHROMA_FORMAT_420, + 0, + 720, + 576, + 720, + 576, + XVMC_MPEG_2, + /* XVMC_OVERLAID_SURFACE | XVMC_SUBPICTURE_INDEPENDENT_SCALING, */ + 0, + /* &yv12_subpicture_list */ + NULL, +}; + +static XF86MCSurfaceInfoRec i915_YV12_mpg1_surface = { + FOURCC_YV12, + XVMC_CHROMA_FORMAT_420, + 0, + 720, + 576, + 720, + 576, + XVMC_MPEG_1, + /* XVMC_OVERLAID_SURFACE | XVMC_SUBPICTURE_INDEPENDENT_SCALING, */ + 0, + NULL, +}; + +static XF86MCSurfaceInfoPtr surface_info_i915[2] = { + (XF86MCSurfaceInfoPtr) & i915_YV12_mpg2_surface, + (XF86MCSurfaceInfoPtr) & i915_YV12_mpg1_surface +}; + +/* i965 and later hwmc support */ +#ifndef XVMC_VLD +#define XVMC_VLD 0x00020000 +#endif + +static XF86MCSurfaceInfoRec yv12_mpeg2_vld_surface = { + FOURCC_YV12, + XVMC_CHROMA_FORMAT_420, + 0, + 1936, + 1096, + 1920, + 1080, + XVMC_MPEG_2 | XVMC_VLD, + XVMC_INTRA_UNSIGNED, + NULL +}; + +static XF86MCSurfaceInfoRec yv12_mpeg2_i965_surface = { + FOURCC_YV12, + XVMC_CHROMA_FORMAT_420, + 0, + 1936, + 1096, + 1920, + 1080, + XVMC_MPEG_2 | XVMC_MOCOMP, + /* XVMC_OVERLAID_SURFACE | XVMC_SUBPICTURE_INDEPENDENT_SCALING, */ + XVMC_INTRA_UNSIGNED, + /* &yv12_subpicture_list */ + NULL +}; + +static XF86MCSurfaceInfoRec yv12_mpeg1_i965_surface = { + FOURCC_YV12, + XVMC_CHROMA_FORMAT_420, + 0, + 1920, + 1080, + 1920, + 1080, + XVMC_MPEG_1 | XVMC_MOCOMP, + /*XVMC_OVERLAID_SURFACE | XVMC_SUBPICTURE_INDEPENDENT_SCALING | + XVMC_INTRA_UNSIGNED, */ + XVMC_INTRA_UNSIGNED, + + /*&yv12_subpicture_list */ + NULL +}; + +static XF86MCSurfaceInfoPtr surface_info_i965[] = { + &yv12_mpeg2_i965_surface, + &yv12_mpeg1_i965_surface +}; + +static XF86MCSurfaceInfoPtr surface_info_vld[] = { + &yv12_mpeg2_vld_surface, + &yv12_mpeg2_i965_surface, +}; + +/* check chip type and load xvmc driver */ +Bool sna_video_xvmc_setup(struct sna *sna, + ScreenPtr screen, + XF86VideoAdaptorPtr target) +{ + XF86MCAdaptorRec *pAdapt; + char *name; + char buf[64]; + + /* Needs KMS support. */ + if (IS_I915G(sna) || IS_I915GM(sna)) + return FALSE; + + if (IS_GEN2(sna)) + return FALSE; + + pAdapt = calloc(1, sizeof(XF86MCAdaptorRec)); + if (!pAdapt) + return FALSE; + + pAdapt->name = target->name; + pAdapt->num_subpictures = 0; + pAdapt->subpictures = NULL; + pAdapt->CreateContext = create_context; + pAdapt->DestroyContext = destroy_context; + pAdapt->CreateSurface = create_surface; + pAdapt->DestroySurface = destroy_surface; + pAdapt->CreateSubpicture = create_subpicture; + pAdapt->DestroySubpicture = destroy_subpicture; + + if (sna->kgem.gen >= 45) { + name = "xvmc_vld", + pAdapt->num_surfaces = ARRAY_SIZE(surface_info_vld); + pAdapt->surfaces = surface_info_vld; + } else if (sna->kgem.gen >= 40) { + name = "i965_xvmc", + pAdapt->num_surfaces = ARRAY_SIZE(surface_info_i965); + pAdapt->surfaces = surface_info_i965; + } else { + name = "i915_xvmc", + pAdapt->num_surfaces = ARRAY_SIZE(surface_info_i915); + pAdapt->surfaces = surface_info_i915; + } + + if (xf86XvMCScreenInit(screen, 1, &pAdapt)) { + xf86DrvMsg(sna->scrn->scrnIndex, X_INFO, + "[XvMC] %s driver initialized.\n", + name); + } else { + xf86DrvMsg(sna->scrn->scrnIndex, X_INFO, + "[XvMC] Failed to initialize XvMC.\n"); + return FALSE; + } + + sprintf(buf, "pci:%04x:%02x:%02x.%d", + sna->PciInfo->domain, + sna->PciInfo->bus, sna->PciInfo->dev, sna->PciInfo->func); + + xf86XvMCRegisterDRInfo(screen, SNA_XVMC_LIBNAME, + buf, + SNA_XVMC_MAJOR, SNA_XVMC_MINOR, + SNA_XVMC_PATCHLEVEL); + return TRUE; +} diff --git a/src/sna/sna_video_hwmc.h b/src/sna/sna_video_hwmc.h new file mode 100644 index 00000000..2494d44b --- /dev/null +++ b/src/sna/sna_video_hwmc.h @@ -0,0 +1,74 @@ +/* + * Copyright © 2007 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Zhenyu Wang <zhenyu.z.wang@sna.com> + * + */ +#ifndef SNA_VIDEO_HWMC_H +#define SNA_VIDEO_HWMC_H + +#define SNA_XVMC_LIBNAME "IntelXvMC" +#define SNA_XVMC_MAJOR 0 +#define SNA_XVMC_MINOR 1 +#define SNA_XVMC_PATCHLEVEL 0 + +/* + * Commands that client submits through XvPutImage: + */ + +#define SNA_XVMC_COMMAND_DISPLAY 0x00 +#define SNA_XVMC_COMMAND_UNDISPLAY 0x01 + +/* hw xvmc support type */ +#define XVMC_I915_MPEG2_MC 0x01 +#define XVMC_I965_MPEG2_MC 0x02 +#define XVMC_I945_MPEG2_VLD 0x04 +#define XVMC_I965_MPEG2_VLD 0x08 + +struct sna_xvmc_hw_context { + unsigned int type; + union { + struct { + unsigned int use_phys_addr : 1; + } i915; + struct { + unsigned int is_g4x:1; + unsigned int is_965_q:1; + unsigned int is_igdng:1; + } i965; + }; +}; + +/* Intel private XvMC command to DDX driver */ +struct sna_xvmc_command { + uint32_t handle; +}; + +#ifdef _SNA_XVMC_SERVER_ +#include <xf86xvmc.h> +Bool sna_video_xvmc_setup(struct sna *sna, + ScreenPtr screen, + XF86VideoAdaptorPtr target); +#endif + +#endif diff --git a/src/sna/sna_video_overlay.c b/src/sna/sna_video_overlay.c new file mode 100644 index 00000000..3f7d9557 --- /dev/null +++ b/src/sna/sna_video_overlay.c @@ -0,0 +1,731 @@ +/*************************************************************************** + + Copyright 2000-2011 Intel Corporation. All Rights Reserved. + + Permission is hereby granted, free of charge, to any person obtaining a + copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sub license, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial portions + of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + IN NO EVENT SHALL INTEL, AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR + THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **************************************************************************/ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "sna.h" +#include "sna_video.h" + +#include <xf86xv.h> +#include <X11/extensions/Xv.h> +#include <fourcc.h> +#include <i915_drm.h> + +#if DEBUG_VIDEO_OVERLAY +#undef DBG +#define DBG(x) ErrorF x +#endif + +#define MAKE_ATOM(a) MakeAtom(a, sizeof(a) - 1, TRUE) + +#define HAS_GAMMA(sna) ((sna)->kgem.gen >= 30) + +static Atom xvBrightness, xvContrast, xvSaturation, xvColorKey, xvPipe; +static Atom xvGamma0, xvGamma1, xvGamma2, xvGamma3, xvGamma4, xvGamma5; +static Atom xvSyncToVblank; + +/* Limits for the overlay/textured video source sizes. The documented hardware + * limits are 2048x2048 or better for overlay and both of our textured video + * implementations. Additionally, on the 830 and 845, larger sizes resulted in + * the card hanging, so we keep the limits lower there. + */ +#define IMAGE_MAX_WIDTH 2048 +#define IMAGE_MAX_HEIGHT 2048 +#define IMAGE_MAX_WIDTH_LEGACY 1024 +#define IMAGE_MAX_HEIGHT_LEGACY 1088 + +/* client libraries expect an encoding */ +static const XF86VideoEncodingRec DummyEncoding[1] = { + { + 0, + "XV_IMAGE", + IMAGE_MAX_WIDTH, IMAGE_MAX_HEIGHT, + {1, 1} + } +}; + +#define NUM_FORMATS 3 + +static XF86VideoFormatRec Formats[NUM_FORMATS] = { + {15, TrueColor}, {16, TrueColor}, {24, TrueColor} +}; + +#define NUM_ATTRIBUTES 5 +static XF86AttributeRec Attributes[NUM_ATTRIBUTES] = { + {XvSettable | XvGettable, 0, (1 << 24) - 1, "XV_COLORKEY"}, + {XvSettable | XvGettable, -128, 127, "XV_BRIGHTNESS"}, + {XvSettable | XvGettable, 0, 255, "XV_CONTRAST"}, + {XvSettable | XvGettable, 0, 1023, "XV_SATURATION"}, + {XvSettable | XvGettable, -1, 1, "XV_PIPE"} +}; + +#define GAMMA_ATTRIBUTES 6 +static XF86AttributeRec GammaAttributes[GAMMA_ATTRIBUTES] = { + {XvSettable | XvGettable, 0, 0xffffff, "XV_GAMMA0"}, + {XvSettable | XvGettable, 0, 0xffffff, "XV_GAMMA1"}, + {XvSettable | XvGettable, 0, 0xffffff, "XV_GAMMA2"}, + {XvSettable | XvGettable, 0, 0xffffff, "XV_GAMMA3"}, + {XvSettable | XvGettable, 0, 0xffffff, "XV_GAMMA4"}, + {XvSettable | XvGettable, 0, 0xffffff, "XV_GAMMA5"} +}; + +#define NUM_IMAGES 4 + +static XF86ImageRec Images[NUM_IMAGES] = { + XVIMAGE_YUY2, + XVIMAGE_YV12, + XVIMAGE_I420, + XVIMAGE_UYVY, +}; + +/* kernel modesetting overlay functions */ +static Bool sna_has_overlay(struct sna *sna) +{ + struct drm_i915_getparam gp; + int has_overlay = 0; + int ret; + + gp.param = I915_PARAM_HAS_OVERLAY; + gp.value = &has_overlay; + ret = drmCommandWriteRead(sna->kgem.fd, DRM_I915_GETPARAM, &gp, sizeof(gp)); + + return !! has_overlay; + (void)ret; +} + +static Bool sna_video_overlay_update_attrs(struct sna *sna, + struct sna_video *video) +{ + struct drm_intel_overlay_attrs attrs; + + DBG(("%s()\n", __FUNCTION__)); + + attrs.flags = I915_OVERLAY_UPDATE_ATTRS; + attrs.brightness = video->brightness; + attrs.contrast = video->contrast; + attrs.saturation = video->saturation; + attrs.color_key = video->color_key; + attrs.gamma0 = video->gamma0; + attrs.gamma1 = video->gamma1; + attrs.gamma2 = video->gamma2; + attrs.gamma3 = video->gamma3; + attrs.gamma4 = video->gamma4; + attrs.gamma5 = video->gamma5; + + return drmCommandWriteRead(sna->kgem.fd, DRM_I915_OVERLAY_ATTRS, + &attrs, sizeof(attrs)) == 0; +} + +static void sna_video_overlay_off(struct sna *sna) +{ + struct drm_intel_overlay_put_image request; + int ret; + + DBG(("%s()\n", __FUNCTION__)); + + request.flags = 0; + + ret = drmCommandWrite(sna->kgem.fd, DRM_I915_OVERLAY_PUT_IMAGE, + &request, sizeof(request)); + (void)ret; +} + +static void sna_video_overlay_stop(ScrnInfoPtr scrn, + pointer data, + Bool shutdown) +{ + struct sna *sna = to_sna(scrn); + struct sna_video *video = data; + + DBG(("%s()\n", __FUNCTION__)); + + REGION_EMPTY(scrn->pScreen, &video->clip); + + if (!shutdown) + return; + + sna_video_overlay_off(sna); + sna_video_free_buffers(sna, video); +} + +static int +sna_video_overlay_set_port_attribute(ScrnInfoPtr scrn, + Atom attribute, INT32 value, pointer data) +{ + struct sna *sna = to_sna(scrn); + struct sna_video *video = data; + + if (attribute == xvBrightness) { + if ((value < -128) || (value > 127)) + return BadValue; + DBG(("%s: BRIGHTNESS %d -> %d\n", __FUNCTION__, + video->contrast, (int)value)); + video->brightness = value; + } else if (attribute == xvContrast) { + if ((value < 0) || (value > 255)) + return BadValue; + DBG(("%s: CONTRAST %d -> %d\n", __FUNCTION__, + video->contrast, (int)value)); + video->contrast = value; + } else if (attribute == xvSaturation) { + if ((value < 0) || (value > 1023)) + return BadValue; + DBG(("%s: SATURATION %d -> %d\n", __FUNCTION__, + video->saturation, (int)value)); + video->saturation = value; + } else if (attribute == xvPipe) { + xf86CrtcConfigPtr xf86_config = XF86_CRTC_CONFIG_PTR(scrn); + if ((value < -1) || (value > xf86_config->num_crtc)) + return BadValue; + if (value < 0) + video->desired_crtc = NULL; + else + video->desired_crtc = xf86_config->crtc[value]; + } else if (attribute == xvGamma0 && HAS_GAMMA(sna)) { + video->gamma0 = value; + } else if (attribute == xvGamma1 && HAS_GAMMA(sna)) { + video->gamma1 = value; + } else if (attribute == xvGamma2 && HAS_GAMMA(sna)) { + video->gamma2 = value; + } else if (attribute == xvGamma3 && HAS_GAMMA(sna)) { + video->gamma3 = value; + } else if (attribute == xvGamma4 && HAS_GAMMA(sna)) { + video->gamma4 = value; + } else if (attribute == xvGamma5 && HAS_GAMMA(sna)) { + video->gamma5 = value; + } else if (attribute == xvColorKey) { + video->color_key = value; + DBG(("COLORKEY\n")); + } else + return BadMatch; + + if ((attribute == xvGamma0 || + attribute == xvGamma1 || + attribute == xvGamma2 || + attribute == xvGamma3 || + attribute == xvGamma4 || + attribute == xvGamma5) && HAS_GAMMA(sna)) { + DBG(("%s: GAMMA\n", __FUNCTION__)); + } + + if (!sna_video_overlay_update_attrs(sna, data)) + return BadValue; + + if (attribute == xvColorKey) + REGION_EMPTY(scrn->pScreen, &video->clip); + + return Success; +} + +static int +sna_video_overlay_get_port_attribute(ScrnInfoPtr scrn, + Atom attribute, INT32 * value, pointer data) +{ + struct sna *sna = to_sna(scrn); + struct sna_video *video = (struct sna_video *) data; + + if (attribute == xvBrightness) { + *value = video->brightness; + } else if (attribute == xvContrast) { + *value = video->contrast; + } else if (attribute == xvSaturation) { + *value = video->saturation; + } else if (attribute == xvPipe) { + int c; + xf86CrtcConfigPtr xf86_config = XF86_CRTC_CONFIG_PTR(scrn); + for (c = 0; c < xf86_config->num_crtc; c++) + if (xf86_config->crtc[c] == video->desired_crtc) + break; + if (c == xf86_config->num_crtc) + c = -1; + *value = c; + } else if (attribute == xvGamma0 && HAS_GAMMA(sna)) { + *value = video->gamma0; + } else if (attribute == xvGamma1 && HAS_GAMMA(sna)) { + *value = video->gamma1; + } else if (attribute == xvGamma2 && HAS_GAMMA(sna)) { + *value = video->gamma2; + } else if (attribute == xvGamma3 && HAS_GAMMA(sna)) { + *value = video->gamma3; + } else if (attribute == xvGamma4 && HAS_GAMMA(sna)) { + *value = video->gamma4; + } else if (attribute == xvGamma5 && HAS_GAMMA(sna)) { + *value = video->gamma5; + } else if (attribute == xvColorKey) { + *value = video->color_key; + } else if (attribute == xvSyncToVblank) { + *value = video->SyncToVblank; + } else + return BadMatch; + + return Success; +} + +static void +sna_video_overlay_query_best_size(ScrnInfoPtr scrn, + Bool motion, + short vid_w, short vid_h, + short drw_w, short drw_h, + unsigned int *p_w, unsigned int *p_h, pointer data) +{ + if (vid_w > (drw_w << 1)) + drw_w = vid_w >> 1; + if (vid_h > (drw_h << 1)) + drw_h = vid_h >> 1; + + *p_w = drw_w; + *p_h = drw_h; +} + +static void +update_dst_box_to_crtc_coords(struct sna *sna, xf86CrtcPtr crtc, BoxPtr dstBox) +{ + ScrnInfoPtr scrn = sna->scrn; + int tmp; + + /* for overlay, we should take it from crtc's screen + * coordinate to current crtc's display mode. + * yeah, a bit confusing. + */ + switch (crtc->rotation & 0xf) { + case RR_Rotate_0: + dstBox->x1 -= crtc->x; + dstBox->x2 -= crtc->x; + dstBox->y1 -= crtc->y; + dstBox->y2 -= crtc->y; + break; + case RR_Rotate_90: + tmp = dstBox->x1; + dstBox->x1 = dstBox->y1 - crtc->x; + dstBox->y1 = scrn->virtualX - tmp - crtc->y; + tmp = dstBox->x2; + dstBox->x2 = dstBox->y2 - crtc->x; + dstBox->y2 = scrn->virtualX - tmp - crtc->y; + tmp = dstBox->y1; + dstBox->y1 = dstBox->y2; + dstBox->y2 = tmp; + break; + case RR_Rotate_180: + tmp = dstBox->x1; + dstBox->x1 = scrn->virtualX - dstBox->x2 - crtc->x; + dstBox->x2 = scrn->virtualX - tmp - crtc->x; + tmp = dstBox->y1; + dstBox->y1 = scrn->virtualY - dstBox->y2 - crtc->y; + dstBox->y2 = scrn->virtualY - tmp - crtc->y; + break; + case RR_Rotate_270: + tmp = dstBox->x1; + dstBox->x1 = scrn->virtualY - dstBox->y1 - crtc->x; + dstBox->y1 = tmp - crtc->y; + tmp = dstBox->x2; + dstBox->x2 = scrn->virtualY - dstBox->y2 - crtc->x; + dstBox->y2 = tmp - crtc->y; + tmp = dstBox->x1; + dstBox->x1 = dstBox->x2; + dstBox->x2 = tmp; + break; + } + + return; +} + +static Bool +sna_video_overlay_show(struct sna *sna, + struct sna_video *video, + struct sna_video_frame *frame, + xf86CrtcPtr crtc, + BoxPtr dstBox, + short src_w, short src_h, + short drw_w, short drw_h) +{ + struct drm_intel_overlay_put_image request; + bool planar = is_planar_fourcc(frame->id); + float scale; + + DBG(("%s: src=(%dx%d), dst=(%dx%d)\n", __FUNCTION__, + src_w, src_h, drw_w, drw_h)); + + update_dst_box_to_crtc_coords(sna, crtc, dstBox); + if (crtc->rotation & (RR_Rotate_90 | RR_Rotate_270)) { + int tmp; + + tmp = frame->width; + frame->width = frame->height; + frame->height = tmp; + + tmp = drw_w; + drw_w = drw_h; + drw_h = tmp; + + tmp = src_w; + src_w = src_h; + src_h = tmp; + } + + memset(&request, 0, sizeof(request)); + request.flags = I915_OVERLAY_ENABLE; + + request.bo_handle = frame->bo->handle; + if (planar) { + request.stride_Y = frame->pitch[1]; + request.stride_UV = frame->pitch[0]; + } else { + request.stride_Y = frame->pitch[0]; + request.stride_UV = 0; + } + request.offset_Y = frame->YBufOffset; + request.offset_U = frame->UBufOffset; + request.offset_V = frame->VBufOffset; + DBG(("%s: handle=%d, stride_Y=%d, stride_UV=%d, off_Y: %i, off_U: %i, off_V: %i\n", + __FUNCTION__, + request.bo_handle, request.stride_Y, request.stride_UV, + request.offset_Y, request.offset_U, request.offset_V)); + + request.crtc_id = sna_crtc_id(crtc); + request.dst_x = dstBox->x1; + request.dst_y = dstBox->y1; + request.dst_width = dstBox->x2 - dstBox->x1; + request.dst_height = dstBox->y2 - dstBox->y1; + + DBG(("%s: crtc=%d, dst=(%d, %d)x(%d, %d)\n", + __FUNCTION__, request.crtc_id, + request.dst_x, request.dst_y, + request.dst_width, request.dst_height)); + + request.src_width = frame->width; + request.src_height = frame->height; + /* adjust src dimensions */ + if (request.dst_height > 1) { + scale = ((float)request.dst_height - 1) / ((float)drw_h - 1); + request.src_scan_height = src_h * scale; + } else + request.src_scan_height = 1; + + if (request.dst_width > 1) { + scale = ((float)request.dst_width - 1) / ((float)drw_w - 1); + request.src_scan_width = src_w * scale; + } else + request.src_scan_width = 1; + + DBG(("%s: src=(%d, %d) scan=(%d, %d)\n", + __FUNCTION__, + request.src_width, request.src_height, + request.src_scan_width, request.src_scan_height)); + + if (planar) { + request.flags |= I915_OVERLAY_YUV_PLANAR | I915_OVERLAY_YUV420; + } else { + request.flags |= I915_OVERLAY_YUV_PACKED | I915_OVERLAY_YUV422; + if (frame->id == FOURCC_UYVY) + request.flags |= I915_OVERLAY_Y_SWAP; + } + + DBG(("%s: flags=%x\n", __FUNCTION__, request.flags)); + + return drmCommandWrite(sna->kgem.fd, DRM_I915_OVERLAY_PUT_IMAGE, + &request, sizeof(request)) == 0; +} + +static int +sna_video_overlay_put_image(ScrnInfoPtr scrn, + short src_x, short src_y, + short drw_x, short drw_y, + short src_w, short src_h, + short drw_w, short drw_h, + int id, unsigned char *buf, + short width, short height, + Bool sync, RegionPtr clip, pointer data, + DrawablePtr drawable) +{ + struct sna *sna = to_sna(scrn); + struct sna_video *video = data; + struct sna_video_frame frame; + BoxRec dstBox; + xf86CrtcPtr crtc; + int top, left, npixels, nlines; + + DBG(("%s: src: (%d,%d)(%d,%d), dst: (%d,%d)(%d,%d), width %d, height %d\n", + __FUNCTION__, + src_x, src_y, src_w, src_h, drw_x, + drw_y, drw_w, drw_h, width, height)); + + /* If dst width and height are less than 1/8th the src size, the + * src/dst scale factor becomes larger than 8 and doesn't fit in + * the scale register. */ + if (src_w >= (drw_w * 8)) + drw_w = src_w / 7; + + if (src_h >= (drw_h * 8)) + drw_h = src_h / 7; + + if (!sna_video_clip_helper(scrn, + video, + &crtc, + &dstBox, + src_x, src_y, drw_x, drw_y, + src_w, src_h, drw_w, drw_h, + id, + &top, &left, &npixels, &nlines, clip, + width, height)) + return Success; + + if (!crtc) { + /* + * If the video isn't visible on any CRTC, turn it off + */ + sna_video_overlay_off(sna); + return Success; + } + + sna_video_frame_init(sna, video, id, width, height, &frame); + + /* overlay can't handle rotation natively, store it for the copy func */ + video->rotation = crtc->rotation; + if (!sna_video_copy_data(sna, video, &frame, + top, left, npixels, nlines, buf)) { + DBG(("%s: failed to copy video data\n", __FUNCTION__)); + return BadAlloc; + } + + if (!sna_video_overlay_show + (sna, video, &frame, crtc, &dstBox, src_w, src_h, drw_w, drw_h)) { + DBG(("%s: failed to show video frame\n", __FUNCTION__)); + return BadAlloc; + } + + sna_video_frame_fini(sna, video, &frame); + + /* update cliplist */ + if (!REGION_EQUAL(scrn->pScreen, &video->clip, clip)) { + REGION_COPY(scrn->pScreen, &video->clip, clip); + xf86XVFillKeyHelperDrawable(drawable, video->color_key, clip); + } + + return Success; +} + +static int +sna_video_overlay_query_video_attributes(ScrnInfoPtr scrn, + int id, + unsigned short *w, unsigned short *h, + int *pitches, int *offsets) +{ + struct sna *sna = to_sna(scrn); + int size, tmp; + + DBG(("%s: w is %d, h is %d\n", __FUNCTION__, *w, *h)); + + if (IS_845G(sna) || IS_I830(sna)) { + if (*w > IMAGE_MAX_WIDTH_LEGACY) + *w = IMAGE_MAX_WIDTH_LEGACY; + if (*h > IMAGE_MAX_HEIGHT_LEGACY) + *h = IMAGE_MAX_HEIGHT_LEGACY; + } else { + if (*w > IMAGE_MAX_WIDTH) + *w = IMAGE_MAX_WIDTH; + if (*h > IMAGE_MAX_HEIGHT) + *h = IMAGE_MAX_HEIGHT; + } + + *w = (*w + 1) & ~1; + if (offsets) + offsets[0] = 0; + + switch (id) { + /* IA44 is for XvMC only */ + case FOURCC_IA44: + case FOURCC_AI44: + if (pitches) + pitches[0] = *w; + size = *w * *h; + break; + case FOURCC_YV12: + case FOURCC_I420: + *h = (*h + 1) & ~1; + size = (*w + 3) & ~3; + if (pitches) + pitches[0] = size; + size *= *h; + if (offsets) + offsets[1] = size; + tmp = ((*w >> 1) + 3) & ~3; + if (pitches) + pitches[1] = pitches[2] = tmp; + tmp *= (*h >> 1); + size += tmp; + if (offsets) + offsets[2] = size; + size += tmp; +#if 0 + if (pitches) + ErrorF("pitch 0 is %d, pitch 1 is %d, pitch 2 is %d\n", + pitches[0], pitches[1], pitches[2]); + if (offsets) + ErrorF("offset 1 is %d, offset 2 is %d\n", offsets[1], + offsets[2]); + if (offsets) + ErrorF("size is %d\n", size); +#endif + break; + case FOURCC_UYVY: + case FOURCC_YUY2: + default: + size = *w << 1; + if (pitches) + pitches[0] = size; + size *= *h; + break; + } + + return size; +} + +static int sna_video_overlay_color_key(struct sna *sna) +{ + ScrnInfoPtr scrn = sna->scrn; + int color_key; + + if (xf86GetOptValInteger(sna->Options, OPTION_VIDEO_KEY, + &color_key)) { + } else if (xf86GetOptValInteger(sna->Options, OPTION_COLOR_KEY, + &color_key)) { + } else { + color_key = + (1 << scrn->offset.red) | + (1 << scrn->offset.green) | + (((scrn->mask.blue >> scrn->offset.blue) - 1) << scrn->offset.blue); + } + + return color_key & ((1 << scrn->depth) - 1); +} + +XF86VideoAdaptorPtr sna_video_overlay_setup(struct sna *sna, + ScreenPtr screen) +{ + XF86VideoAdaptorPtr adaptor; + struct sna_video *video; + XF86AttributePtr att; + + if (!sna_has_overlay(sna)) { + xf86DrvMsg(sna->scrn->scrnIndex, X_WARNING, + "Overlay video not supported on this hardware\n"); + return NULL; + } + + DBG(("%s()\n", __FUNCTION__)); + + if (!(adaptor = calloc(1, + sizeof(XF86VideoAdaptorRec) + + sizeof(struct sna_video) + + sizeof(DevUnion)))) + return NULL; + + adaptor->type = XvWindowMask | XvInputMask | XvImageMask; + adaptor->flags = VIDEO_OVERLAID_IMAGES /*| VIDEO_CLIP_TO_VIEWPORT */ ; + adaptor->name = "Intel(R) Video Overlay"; + adaptor->nEncodings = 1; + adaptor->pEncodings = xnfalloc(sizeof(DummyEncoding)); + memcpy(adaptor->pEncodings, DummyEncoding, sizeof(DummyEncoding)); + if (IS_845G(sna) || IS_I830(sna)) { + adaptor->pEncodings->width = IMAGE_MAX_WIDTH_LEGACY; + adaptor->pEncodings->height = IMAGE_MAX_HEIGHT_LEGACY; + } + adaptor->nFormats = NUM_FORMATS; + adaptor->pFormats = Formats; + adaptor->nPorts = 1; + adaptor->pPortPrivates = (DevUnion *)&adaptor[1]; + + video = (struct sna_video *)&adaptor->pPortPrivates[1]; + + adaptor->pPortPrivates[0].ptr = video; + adaptor->nAttributes = NUM_ATTRIBUTES; + if (HAS_GAMMA(sna)) + adaptor->nAttributes += GAMMA_ATTRIBUTES; + adaptor->pAttributes = + xnfalloc(sizeof(XF86AttributeRec) * adaptor->nAttributes); + /* Now copy the attributes */ + att = adaptor->pAttributes; + memcpy(att, Attributes, sizeof(XF86AttributeRec) * NUM_ATTRIBUTES); + att += NUM_ATTRIBUTES; + if (HAS_GAMMA(sna)) { + memcpy(att, GammaAttributes, + sizeof(XF86AttributeRec) * GAMMA_ATTRIBUTES); + att += GAMMA_ATTRIBUTES; + } + adaptor->nImages = NUM_IMAGES; + adaptor->pImages = Images; + adaptor->PutVideo = NULL; + adaptor->PutStill = NULL; + adaptor->GetVideo = NULL; + adaptor->GetStill = NULL; + adaptor->StopVideo = sna_video_overlay_stop; + adaptor->SetPortAttribute = sna_video_overlay_set_port_attribute; + adaptor->GetPortAttribute = sna_video_overlay_get_port_attribute; + adaptor->QueryBestSize = sna_video_overlay_query_best_size; + adaptor->PutImage = sna_video_overlay_put_image; + adaptor->QueryImageAttributes = sna_video_overlay_query_video_attributes; + + video->textured = FALSE; + video->color_key = sna_video_overlay_color_key(sna); + video->brightness = -19; /* (255/219) * -16 */ + video->contrast = 75; /* 255/219 * 64 */ + video->saturation = 146; /* 128/112 * 128 */ + video->desired_crtc = NULL; + video->gamma5 = 0xc0c0c0; + video->gamma4 = 0x808080; + video->gamma3 = 0x404040; + video->gamma2 = 0x202020; + video->gamma1 = 0x101010; + video->gamma0 = 0x080808; + + video->rotation = RR_Rotate_0; + + /* gotta uninit this someplace */ + REGION_NULL(screen, &video->clip); + + xvColorKey = MAKE_ATOM("XV_COLORKEY"); + xvBrightness = MAKE_ATOM("XV_BRIGHTNESS"); + xvContrast = MAKE_ATOM("XV_CONTRAST"); + xvSaturation = MAKE_ATOM("XV_SATURATION"); + + /* Allow the pipe to be switched from pipe A to B when in clone mode */ + xvPipe = MAKE_ATOM("XV_PIPE"); + + if (HAS_GAMMA(sna)) { + xvGamma0 = MAKE_ATOM("XV_GAMMA0"); + xvGamma1 = MAKE_ATOM("XV_GAMMA1"); + xvGamma2 = MAKE_ATOM("XV_GAMMA2"); + xvGamma3 = MAKE_ATOM("XV_GAMMA3"); + xvGamma4 = MAKE_ATOM("XV_GAMMA4"); + xvGamma5 = MAKE_ATOM("XV_GAMMA5"); + } + + sna_video_overlay_update_attrs(sna, video); + + return adaptor; +} diff --git a/src/sna/sna_video_textured.c b/src/sna/sna_video_textured.c new file mode 100644 index 00000000..66c70d4a --- /dev/null +++ b/src/sna/sna_video_textured.c @@ -0,0 +1,428 @@ +/*************************************************************************** + + Copyright 2000-2011 Intel Corporation. All Rights Reserved. + + Permission is hereby granted, free of charge, to any person obtaining a + copy of this software and associated documentation files (the + "Software"), to deal in the Software without restriction, including + without limitation the rights to use, copy, modify, merge, publish, + distribute, sub license, and/or sell copies of the Software, and to + permit persons to whom the Software is furnished to do so, subject to + the following conditions: + + The above copyright notice and this permission notice (including the + next paragraph) shall be included in all copies or substantial portions + of the Software. + + THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS + OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. + IN NO EVENT SHALL INTEL, AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, + DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR + OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR + THE USE OR OTHER DEALINGS IN THE SOFTWARE. + + **************************************************************************/ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "sna.h" +#include "sna_video.h" + +#include <xf86xv.h> +#include <X11/extensions/Xv.h> + +#ifdef SNA_XVMC +#define _SNA_XVMC_SERVER_ +#include "sna_video_hwmc.h" +#endif + +#if DEBUG_VIDEO_TEXTURED +#undef DBG +#define DBG(x) ErrorF x +#else +#define NDEBUG 1 +#endif + +#define MAKE_ATOM(a) MakeAtom(a, sizeof(a) - 1, TRUE) + +static Atom xvBrightness, xvContrast, xvSyncToVblank; + +/* client libraries expect an encoding */ +static const XF86VideoEncodingRec DummyEncoding[1] = { + { + 0, + "XV_IMAGE", + 8192, 8192, + {1, 1} + } +}; + +#define NUM_FORMATS 3 + +static XF86VideoFormatRec Formats[NUM_FORMATS] = { + {15, TrueColor}, {16, TrueColor}, {24, TrueColor} +}; + +#define NUM_TEXTURED_ATTRIBUTES 3 +static XF86AttributeRec TexturedAttributes[NUM_TEXTURED_ATTRIBUTES] = { + {XvSettable | XvGettable, -128, 127, "XV_BRIGHTNESS"}, + {XvSettable | XvGettable, 0, 255, "XV_CONTRAST"}, + {XvSettable | XvGettable, -1, 1, "XV_SYNC_TO_VBLANK"}, +}; + +#ifdef SNA_XVMC +#define NUM_IMAGES 5 +#define XVMC_IMAGE 1 +#else +#define NUM_IMAGES 4 +#define XVMC_IMAGE 0 +#endif + +static XF86ImageRec Images[NUM_IMAGES] = { + XVIMAGE_YUY2, + XVIMAGE_YV12, + XVIMAGE_I420, + XVIMAGE_UYVY, +#ifdef SNA_XVMC + { + /* + * Below, a dummy picture type that is used in XvPutImage + * only to do an overlay update. + * Introduced for the XvMC client lib. + * Defined to have a zero data size. + */ + FOURCC_XVMC, + XvYUV, + LSBFirst, + {'X', 'V', 'M', 'C', + 0x00, 0x00, 0x00, 0x10, 0x80, 0x00, 0x00, 0xAA, 0x00, + 0x38, 0x9B, 0x71}, + 12, + XvPlanar, + 3, + 0, 0, 0, 0, + 8, 8, 8, + 1, 2, 2, + 1, 2, 2, + {'Y', 'V', 'U', + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, + XvTopToBottom}, +#endif +}; + +static int xvmc_passthrough(int id) +{ +#ifdef SNA_XVMC + return id == FOURCC_XVMC; +#else + return 0; +#endif +} + +static void sna_video_textured_stop(ScrnInfoPtr scrn, + pointer data, + Bool shutdown) +{ + struct sna *sna = to_sna(scrn); + struct sna_video *video = data; + + DBG(("%s()\n", __FUNCTION__)); + + REGION_EMPTY(scrn->pScreen, &video->clip); + + if (!shutdown) + return; + + sna_video_free_buffers(sna, video); +} + +static int +sna_video_textured_set_attribute(ScrnInfoPtr scrn, + Atom attribute, + INT32 value, + pointer data) +{ + struct sna_video *video = data; + + if (attribute == xvBrightness) { + if (value < -128 || value > 127) + return BadValue; + + video->brightness = value; + } else if (attribute == xvContrast) { + if (value < 0 || value > 255) + return BadValue; + + video->contrast = value; + } else if (attribute == xvSyncToVblank) { + if (value < -1 || value > 1) + return BadValue; + + video->SyncToVblank = value; + } else + return BadMatch; + + return Success; +} + +static int +sna_video_textured_get_attribute(ScrnInfoPtr scrn, + Atom attribute, + INT32 *value, + pointer data) +{ + struct sna_video *video = data; + + if (attribute == xvBrightness) + *value = video->brightness; + else if (attribute == xvContrast) + *value = video->contrast; + else if (attribute == xvSyncToVblank) + *value = video->SyncToVblank; + else + return BadMatch; + + return Success; +} + +static void +sna_video_textured_best_size(ScrnInfoPtr scrn, + Bool motion, + short vid_w, short vid_h, + short drw_w, short drw_h, + unsigned int *p_w, + unsigned int *p_h, + pointer data) +{ + if (vid_w > (drw_w << 1)) + drw_w = vid_w >> 1; + if (vid_h > (drw_h << 1)) + drw_h = vid_h >> 1; + + *p_w = drw_w; + *p_h = drw_h; +} + +/* + * The source rectangle of the video is defined by (src_x, src_y, src_w, src_h). + * The dest rectangle of the video is defined by (drw_x, drw_y, drw_w, drw_h). + * id is a fourcc code for the format of the video. + * buf is the pointer to the source data in system memory. + * width and height are the w/h of the source data. + * If "sync" is TRUE, then we must be finished with *buf at the point of return + * (which we always are). + * clip is the clipping region in screen space. + * data is a pointer to our port private. + * drawable is some Drawable, which might not be the screen in the case of + * compositing. It's a new argument to the function in the 1.1 server. + */ +static int +sna_video_textured_put_image(ScrnInfoPtr scrn, + short src_x, short src_y, + short drw_x, short drw_y, + short src_w, short src_h, + short drw_w, short drw_h, + int id, unsigned char *buf, + short width, short height, + Bool sync, RegionPtr clip, pointer data, + DrawablePtr drawable) +{ + struct sna *sna = to_sna(scrn); + struct sna_video *video = data; + struct sna_video_frame frame; + PixmapPtr pixmap = get_drawable_pixmap(drawable); + BoxRec dstBox; + xf86CrtcPtr crtc; + int top, left, npixels, nlines; + + if (!sna_video_clip_helper(scrn, video, &crtc, &dstBox, + src_x, src_y, drw_x, drw_y, + src_w, src_h, drw_w, drw_h, + id, + &top, &left, &npixels, &nlines, + clip, width, height)) + return Success; + + sna_video_frame_init(sna, video, id, width, height, &frame); + + if (xvmc_passthrough(id)) { + if (IS_I915G(sna) || IS_I915GM(sna)) { + /* XXX: i915 is not support and needs some + * serious care. grep for KMS in i915_hwmc.c */ + return BadAlloc; + } + + frame.bo = kgem_create_for_name(&sna->kgem, *(uint32_t*)buf); + if (frame.bo == NULL) + return BadAlloc; + } else { + if (!sna_video_copy_data(sna, video, &frame, + top, left, npixels, nlines, + buf)) + return BadAlloc; + } + + if (crtc && video->SyncToVblank != 0) + sna_wait_for_scanline(sna, pixmap, crtc, clip); + + sna->render.video(sna, video, &frame, clip, + src_w, src_h, + drw_w, drw_h, + pixmap); + + sna_video_frame_fini(sna, video, &frame); + + DamageDamageRegion(drawable, clip); + + /* Push the frame to the GPU as soon as possible so + * we can hit the next vsync. + */ + kgem_submit(&sna->kgem); + + return Success; +} + +static int +sna_video_textured_query(ScrnInfoPtr scrn, + int id, + unsigned short *w, unsigned short *h, + int *pitches, int *offsets) +{ + int size, tmp; + + if (*w > 8192) + *w = 8192; + if (*h > 8192) + *h = 8192; + + *w = (*w + 1) & ~1; + if (offsets) + offsets[0] = 0; + + switch (id) { + /* IA44 is for XvMC only */ + case FOURCC_IA44: + case FOURCC_AI44: + if (pitches) + pitches[0] = *w; + size = *w * *h; + break; + case FOURCC_YV12: + case FOURCC_I420: + *h = (*h + 1) & ~1; + size = (*w + 3) & ~3; + if (pitches) + pitches[0] = size; + size *= *h; + if (offsets) + offsets[1] = size; + tmp = ((*w >> 1) + 3) & ~3; + if (pitches) + pitches[1] = pitches[2] = tmp; + tmp *= (*h >> 1); + size += tmp; + if (offsets) + offsets[2] = size; + size += tmp; + break; + case FOURCC_UYVY: + case FOURCC_YUY2: + default: + size = *w << 1; + if (pitches) + pitches[0] = size; + size *= *h; + break; +#ifdef SNA_XVMC + case FOURCC_XVMC: + *h = (*h + 1) & ~1; + size = sizeof(struct sna_xvmc_command); + if (pitches) + pitches[0] = size; + break; +#endif + } + + return size; +} + +XF86VideoAdaptorPtr sna_video_textured_setup(struct sna *sna, + ScreenPtr screen) +{ + XF86VideoAdaptorPtr adaptor; + XF86AttributePtr attrs; + struct sna_video *video; + DevUnion *devUnions; + int nports = 16, i; + + if (!sna->render.video) { + xf86DrvMsg(sna->scrn->scrnIndex, X_WARNING, + "Textured video not supported on this hardware\n"); + return NULL; + } + + adaptor = calloc(1, sizeof(XF86VideoAdaptorRec)); + video = calloc(nports, sizeof(struct sna_video)); + devUnions = calloc(nports, sizeof(DevUnion)); + attrs = calloc(NUM_TEXTURED_ATTRIBUTES, sizeof(XF86AttributeRec)); + if (adaptor == NULL || + video == NULL || + devUnions == NULL || + attrs == NULL) { + free(adaptor); + free(video); + free(devUnions); + free(attrs); + return NULL; + } + + adaptor->type = XvWindowMask | XvInputMask | XvImageMask; + adaptor->flags = 0; + adaptor->name = "Intel(R) Textured Video"; + adaptor->nEncodings = 1; + adaptor->pEncodings = xnfalloc(sizeof(DummyEncoding)); + memcpy(adaptor->pEncodings, DummyEncoding, sizeof(DummyEncoding)); + adaptor->nFormats = NUM_FORMATS; + adaptor->pFormats = Formats; + adaptor->nPorts = nports; + adaptor->pPortPrivates = devUnions; + adaptor->nAttributes = NUM_TEXTURED_ATTRIBUTES; + adaptor->pAttributes = attrs; + memcpy(attrs, TexturedAttributes, + NUM_TEXTURED_ATTRIBUTES * sizeof(XF86AttributeRec)); + adaptor->nImages = NUM_IMAGES; + adaptor->pImages = Images; + adaptor->PutVideo = NULL; + adaptor->PutStill = NULL; + adaptor->GetVideo = NULL; + adaptor->GetStill = NULL; + adaptor->StopVideo = sna_video_textured_stop; + adaptor->SetPortAttribute = sna_video_textured_set_attribute; + adaptor->GetPortAttribute = sna_video_textured_get_attribute; + adaptor->QueryBestSize = sna_video_textured_best_size; + adaptor->PutImage = sna_video_textured_put_image; + adaptor->QueryImageAttributes = sna_video_textured_query; + + for (i = 0; i < nports; i++) { + struct sna_video *v = &video[i]; + + v->textured = TRUE; + v->rotation = RR_Rotate_0; + v->SyncToVblank = 1; + + /* gotta uninit this someplace, XXX: shouldn't be necessary for textured */ + RegionNull(&v->clip); + + adaptor->pPortPrivates[i].ptr = v; + } + + xvBrightness = MAKE_ATOM("XV_BRIGHTNESS"); + xvContrast = MAKE_ATOM("XV_CONTRAST"); + xvSyncToVblank = MAKE_ATOM("XV_SYNC_TO_VBLANK"); + + return adaptor; +} |