summaryrefslogtreecommitdiff
path: root/src/sna/sna_video.c
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2011-04-08 07:17:14 +0100
committerChris Wilson <chris@chris-wilson.co.uk>2011-06-04 09:19:46 +0100
commitbcef98af561939aa48d9236b2dfa2c5626adf4cb (patch)
tree9d05558947a97595a6fdece968b50eeae45bbfb1 /src/sna/sna_video.c
parent340cfb7f5271fd1df4c8948e5c9336f5b69a6e6c (diff)
sna: Introduce a new acceleration model.
The premise is that switching between rings (i.e. the BLT and RENDER rings) on SandyBridge imposes a large latency overhead whilst rendering. The cause is that in order to switch rings, we need to split the batch earlier than is desired and to add serialisation between the rings. Both of which incur large overhead. By switching to using a pure 3D blit engine (ok, not so pure as the BLT engine still has uses for the core drawing model which can not be easily represented without a combinatorial explosion of shaders) we can take advantage of additional efficiencies, such as relative relocations, that have been incorporated into recent hardware advances. However, even older hardware performs better from avoiding the implicit context switches and from the batching efficiency of the 3D pipeline... But this is X, and PolyGlyphBlt still exists and remains in use. So for the operations that are not worth accelerating in hardware, we introduce a shadow buffer mechanism through out and reintroduce pixmap migration. Doing this efficiently is the cornerstone of ensuring that we do exploit the increased potential of recent hardware for running old applications and environments (i.e. so that the latest and greatest chip is actually faster than gen2!) For the curious, sna is SandyBridge's New Acceleration. If you are running older chipsets and welcome the performance increase offered by this patch, then you may choose to call it Snazzy instead. Speedups ======== gen3 firefox-fishtank 1203584.56 (1203842.75 0.01%) -> 85561.71 (125146.44 14.87%): 14.07x speedup gen5 grads-heat-map 3385.42 (3489.73 1.44%) -> 350.29 (350.75 0.18%): 9.66x speedup gen3 xfce4-terminal-a1 4179.02 (4180.09 0.06%) -> 503.90 (531.88 4.48%): 8.29x speedup gen4 grads-heat-map 2458.66 (2826.34 4.64%) -> 348.82 (349.20 0.29%): 7.05x speedup gen3 grads-heat-map 1443.33 (1445.32 0.09%) -> 298.55 (298.76 0.05%): 4.83x speedup gen3 swfdec-youtube 3836.14 (3894.14 0.95%) -> 889.84 (979.56 5.99%): 4.31x speedup gen6 grads-heat-map 742.11 (744.44 0.15%) -> 172.51 (172.93 0.20%): 4.30x speedup gen3 firefox-talos-svg 71740.44 (72370.13 0.59%) -> 21959.29 (21995.09 0.68%): 3.27x speedup gen5 gvim 8045.51 (8071.47 0.17%) -> 2589.38 (3246.78 10.74%): 3.11x speedup gen6 poppler 3800.78 (3817.92 0.24%) -> 1227.36 (1230.12 0.30%): 3.10x speedup gen6 gnome-terminal-vim 9106.84 (9111.56 0.03%) -> 3459.49 (3478.52 0.25%): 2.63x speedup gen5 midori-zoomed 9564.53 (9586.58 0.17%) -> 3677.73 (3837.02 2.02%): 2.60x speedup gen5 gnome-terminal-vim 38167.25 (38215.82 0.08%) -> 14901.09 (14902.28 0.01%): 2.56x speedup gen5 poppler 13575.66 (13605.04 0.16%) -> 5554.27 (5555.84 0.01%): 2.44x speedup gen5 swfdec-giant-steps 8941.61 (8988.72 0.52%) -> 3851.98 (3871.01 0.93%): 2.32x speedup gen5 xfce4-terminal-a1 18956.60 (18986.90 0.07%) -> 8362.75 (8365.70 0.01%): 2.27x speedup gen5 firefox-fishtank 88750.31 (88858.23 0.14%) -> 39164.57 (39835.54 0.80%): 2.27x speedup gen3 midori-zoomed 2392.13 (2397.82 0.14%) -> 1109.96 (1303.10 30.35%): 2.16x speedup gen6 gvim 2510.34 (2513.34 0.20%) -> 1200.76 (1204.30 0.22%): 2.09x speedup gen5 firefox-planet-gnome 40478.16 (40565.68 0.09%) -> 19606.22 (19648.79 0.16%): 2.06x speedup gen5 gnome-system-monitor 10344.47 (10385.62 0.29%) -> 5136.69 (5256.85 1.15%): 2.01x speedup gen3 poppler 2595.23 (2603.10 0.17%) -> 1297.56 (1302.42 0.61%): 2.00x speedup gen6 firefox-talos-gfx 7184.03 (7194.97 0.13%) -> 3806.31 (3811.66 0.06%): 1.89x speedup gen5 evolution 8739.25 (8766.12 0.27%) -> 4817.54 (5050.96 1.54%): 1.81x speedup gen3 evolution 1684.06 (1696.88 0.35%) -> 1004.99 (1008.55 0.85%): 1.68x speedup gen3 gnome-terminal-vim 4285.13 (4287.68 0.04%) -> 2715.97 (3202.17 13.52%): 1.58x speedup gen5 swfdec-youtube 5843.94 (5951.07 0.91%) -> 3810.86 (3826.04 1.32%): 1.53x speedup gen4 poppler 7496.72 (7558.83 0.58%) -> 5125.08 (5247.65 1.44%): 1.46x speedup gen4 gnome-terminal-vim 21126.24 (21292.08 0.85%) -> 14590.25 (15066.33 1.80%): 1.45x speedup gen5 firefox-talos-svg 99873.69 (100300.95 0.37%) -> 70745.66 (70818.86 0.05%): 1.41x speedup gen4 firefox-planet-gnome 28205.10 (28304.45 0.27%) -> 19996.11 (20081.44 0.56%): 1.41x speedup gen5 firefox-talos-gfx 93070.85 (93194.72 0.10%) -> 67687.93 (70374.37 1.30%): 1.37x speedup gen4 evolution 6696.25 (6854.14 0.85%) -> 4958.62 (5027.73 0.85%): 1.35x speedup gen3 swfdec-giant-steps 2538.03 (2539.30 0.04%) -> 1895.71 (2050.62 62.43%): 1.34x speedup gen4 gvim 4356.18 (4422.78 0.70%) -> 3276.31 (3281.69 0.13%): 1.33x speedup gen6 evolution 1242.13 (1245.44 0.72%) -> 953.76 (954.54 0.07%): 1.30x speedup gen6 firefox-planet-gnome 4554.23 (4560.69 0.08%) -> 3758.76 (3768.97 0.28%): 1.21x speedup gen3 firefox-talos-gfx 6264.13 (6284.65 0.30%) -> 5261.56 (5370.87 1.28%): 1.19x speedup gen4 midori-zoomed 4771.13 (4809.90 0.73%) -> 4037.03 (4118.93 0.85%): 1.18x speedup gen6 swfdec-giant-steps 1557.06 (1560.13 0.12%) -> 1336.34 (1341.29 0.32%): 1.17x speedup gen4 firefox-talos-gfx 80767.28 (80986.31 0.17%) -> 69629.08 (69721.71 0.06%): 1.16x speedup gen6 midori-zoomed 1463.70 (1463.76 0.08%) -> 1331.45 (1336.56 0.22%): 1.10x speedup Slowdowns ========= gen6 xfce4-terminal-a1 2030.25 (2036.23 0.25%) -> 2144.60 (2240.31 4.29%): 1.06x slowdown gen4 swfdec-youtube 3580.00 (3597.23 3.92%) -> 3826.90 (3862.24 0.91%): 1.07x slowdown gen4 firefox-talos-svg 66112.25 (66256.51 0.11%) -> 71433.40 (71584.31 0.14%): 1.08x slowdown gen4 gnome-system-monitor 5691.60 (5724.03 0.56%) -> 6707.56 (6747.83 0.33%): 1.18x slowdown gen3 ocitysmap 3494.05 (3502.44 0.20%) -> 4321.99 (4524.42 2.78%): 1.24x slowdown gen4 ocitysmap 3628.42 (3641.66 9.37%) -> 5177.16 (5828.74 8.38%): 1.43x slowdown gen5 ocitysmap 4027.77 (4068.11 0.80%) -> 5748.26 (6282.25 7.38%): 1.43x slowdown gen6 ocitysmap 1401.61 (1402.24 0.40%) -> 2365.74 (2379.14 4.12%): 1.69x slowdown [Note the performance regression for ocitysmap comes from that we now attempt to support rendering to and (more importantly) from large surfaces. By enabling such operations is the only way to one day be faster than purely using the CPU, in the meantime we suffer regression due to the increased migration and aperture thrashing. The other couple of regressions will be eliminated with improved span and shader support, now that the framework for such is in place.] The performance increase for Cairo completely overlooks the other critical aspects of the architecture: World of Padman: gen3 (800x600): 57.5 -> 96.2 gen4 (800x600): 47.8 -> 74.6 gen6 (1366x768): 100.4 -> 140.3 [F15] 144.3 -> 146.4 [drm-intel-next] x11perf (gen6); aa10text: 3.47 -> 14.3 Mglyphs/s [unthrottled!] copywinwin10: 1.66 -> 1.99 Mops/s copywinpix10: 2.28 -> 2.98 Mops/s And we do not have a good measure for how much improvement the reworking of the fallback paths give, except that xterm is now over 4x faster... PS: This depends upon the Xorg patchset "Remove the cacheing of the last scratch PixmapRec" for correct invalidations of scratch Pixmaps (used by the dix to implement SHM operations, used by chromium and gtk+ pixbufs. PPS: ./configure --enable-sna Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Diffstat (limited to 'src/sna/sna_video.c')
-rw-r--r--src/sna/sna_video.c737
1 files changed, 737 insertions, 0 deletions
diff --git a/src/sna/sna_video.c b/src/sna/sna_video.c
new file mode 100644
index 00000000..b6cbda22
--- /dev/null
+++ b/src/sna/sna_video.c
@@ -0,0 +1,737 @@
+/***************************************************************************
+
+ Copyright 2000 Intel Corporation. All Rights Reserved.
+
+ Permission is hereby granted, free of charge, to any person obtaining a
+ copy of this software and associated documentation files (the
+ "Software"), to deal in the Software without restriction, including
+ without limitation the rights to use, copy, modify, merge, publish,
+ distribute, sub license, and/or sell copies of the Software, and to
+ permit persons to whom the Software is furnished to do so, subject to
+ the following conditions:
+
+ The above copyright notice and this permission notice (including the
+ next paragraph) shall be included in all copies or substantial portions
+ of the Software.
+
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ IN NO EVENT SHALL INTEL, AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR
+ THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+
+ **************************************************************************/
+
+/*
+ * i830_video.c: i830/i845 Xv driver.
+ *
+ * Copyright © 2002 by Alan Hourihane and David Dawes
+ *
+ * Authors:
+ * Alan Hourihane <alanh@tungstengraphics.com>
+ * David Dawes <dawes@xfree86.org>
+ *
+ * Derived from i810 Xv driver:
+ *
+ * Authors of i810 code:
+ * Jonathan Bian <jonathan.bian@intel.com>
+ * Offscreen Images:
+ * Matt Sottek <matthew.j.sottek@intel.com>
+ */
+
+#ifdef HAVE_CONFIG_H
+#include "config.h"
+#endif
+
+#include <inttypes.h>
+#include <math.h>
+#include <string.h>
+#include <assert.h>
+#include <errno.h>
+
+#include <sys/mman.h>
+
+#include "sna.h"
+#include "sna_reg.h"
+#include "sna_video.h"
+
+#include <xf86xv.h>
+#include <X11/extensions/Xv.h>
+
+#ifdef SNA_XVMC
+#define _SNA_XVMC_SERVER_
+#include "sna_video_hwmc.h"
+#else
+static inline Bool sna_video_xvmc_setup(struct sna *sna,
+ ScreenPtr ptr,
+ XF86VideoAdaptorPtr target)
+{
+ return FALSE;
+}
+#endif
+
+#if DEBUG_VIDEO_TEXTURED
+#undef DBG
+#define DBG(x) ErrorF x
+#endif
+
+void sna_video_free_buffers(struct sna *sna, struct sna_video *video)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(video->old_buf); i++) {
+ if (video->old_buf[i]) {
+ kgem_bo_destroy(&sna->kgem, video->old_buf[i]);
+ video->old_buf[i] = NULL;
+ }
+ }
+
+ if (video->buf) {
+ kgem_bo_destroy(&sna->kgem, video->buf);
+ video->buf = NULL;
+ }
+}
+
+void sna_video_frame_fini(struct sna *sna,
+ struct sna_video *video,
+ struct sna_video_frame *frame)
+{
+ struct kgem_bo *bo;
+
+ if (!frame->bo->reusable) {
+ kgem_bo_destroy(&sna->kgem, frame->bo);
+ return;
+ }
+
+ bo = video->old_buf[1];
+ video->old_buf[1] = video->old_buf[0];
+ video->old_buf[0] = video->buf;
+ video->buf = bo;
+}
+
+Bool
+sna_video_clip_helper(ScrnInfoPtr scrn,
+ struct sna_video *video,
+ xf86CrtcPtr * crtc_ret,
+ BoxPtr dst,
+ short src_x, short src_y,
+ short drw_x, short drw_y,
+ short src_w, short src_h,
+ short drw_w, short drw_h,
+ int id,
+ int *top, int* left, int* npixels, int *nlines,
+ RegionPtr reg, INT32 width, INT32 height)
+{
+ Bool ret;
+ RegionRec crtc_region_local;
+ RegionPtr crtc_region = reg;
+ BoxRec crtc_box;
+ INT32 x1, x2, y1, y2;
+ xf86CrtcPtr crtc;
+
+ x1 = src_x;
+ x2 = src_x + src_w;
+ y1 = src_y;
+ y2 = src_y + src_h;
+
+ dst->x1 = drw_x;
+ dst->x2 = drw_x + drw_w;
+ dst->y1 = drw_y;
+ dst->y2 = drw_y + drw_h;
+
+ /*
+ * For overlay video, compute the relevant CRTC and
+ * clip video to that
+ */
+ crtc = sna_covering_crtc(scrn, dst, video->desired_crtc,
+ &crtc_box);
+
+ /* For textured video, we don't actually want to clip at all. */
+ if (crtc && !video->textured) {
+ RegionInit(&crtc_region_local, &crtc_box, 1);
+ crtc_region = &crtc_region_local;
+ RegionIntersect(crtc_region, crtc_region, reg);
+ }
+ *crtc_ret = crtc;
+
+ ret = xf86XVClipVideoHelper(dst, &x1, &x2, &y1, &y2,
+ crtc_region, width, height);
+ if (crtc_region != reg)
+ RegionUninit(&crtc_region_local);
+
+ *top = y1 >> 16;
+ *left = (x1 >> 16) & ~1;
+ *npixels = ALIGN(((x2 + 0xffff) >> 16), 2) - *left;
+ if (is_planar_fourcc(id)) {
+ *top &= ~1;
+ *nlines = ALIGN(((y2 + 0xffff) >> 16), 2) - *top;
+ } else
+ *nlines = ((y2 + 0xffff) >> 16) - *top;
+
+ return ret;
+}
+
+void
+sna_video_frame_init(struct sna *sna,
+ struct sna_video *video,
+ int id, short width, short height,
+ struct sna_video_frame *frame)
+{
+ int align;
+
+ frame->id = id;
+ frame->width = width;
+ frame->height = height;
+
+ /* Only needs to be DWORD-aligned for textured on i915, but overlay has
+ * stricter requirements.
+ */
+ if (video->textured) {
+ align = 4;
+ } else {
+ if (sna->kgem.gen >= 40)
+ /* Actually the alignment is 64 bytes, too. But the
+ * stride must be at least 512 bytes. Take the easy fix
+ * and align on 512 bytes unconditionally. */
+ align = 512;
+ else if (IS_I830(sna) || IS_845G(sna))
+ /* Harsh, errata on these chipsets limit the stride
+ * to be a multiple of 256 bytes.
+ */
+ align = 256;
+ else
+ align = 64;
+ }
+
+#if SNA_XVMC
+ /* for i915 xvmc, hw requires 1kb aligned surfaces */
+ if (id == FOURCC_XVMC && sna->kgem.gen < 40)
+ align = 1024;
+#endif
+
+
+ /* Determine the desired destination pitch (representing the chroma's pitch,
+ * in the planar case.
+ */
+ if (is_planar_fourcc(id)) {
+ if (video->rotation & (RR_Rotate_90 | RR_Rotate_270)) {
+ frame->pitch[0] = ALIGN((height / 2), align);
+ frame->pitch[1] = ALIGN(height, align);
+ frame->size = frame->pitch[0] * width * 3;
+ } else {
+ frame->pitch[0] = ALIGN((width / 2), align);
+ frame->pitch[1] = ALIGN(width, align);
+ frame->size = frame->pitch[0] * height * 3;
+ }
+ } else {
+ if (video->rotation & (RR_Rotate_90 | RR_Rotate_270)) {
+ frame->pitch[0] = ALIGN((height << 1), align);
+ frame->size = frame->pitch[0] * width;
+ } else {
+ frame->pitch[0] = ALIGN((width << 1), align);
+ frame->size = frame->pitch[0] * height;
+ }
+ frame->pitch[1] = 0;
+ }
+
+ frame->YBufOffset = 0;
+
+ if (video->rotation & (RR_Rotate_90 | RR_Rotate_270)) {
+ frame->UBufOffset =
+ frame->YBufOffset + frame->pitch[1] * width;
+ frame->VBufOffset =
+ frame->UBufOffset + frame->pitch[0] * width / 2;
+ } else {
+ frame->UBufOffset =
+ frame->YBufOffset + frame->pitch[1] * height;
+ frame->VBufOffset =
+ frame->UBufOffset + frame->pitch[0] * height / 2;
+ }
+}
+
+static struct kgem_bo *
+sna_video_buffer(struct sna *sna,
+ struct sna_video *video,
+ struct sna_video_frame *frame)
+{
+ /* Free the current buffer if we're going to have to reallocate */
+ if (video->buf && video->buf->size < frame->size)
+ sna_video_free_buffers(sna, video);
+
+ if (video->buf == NULL)
+ video->buf = kgem_create_linear(&sna->kgem, frame->size);
+
+ return video->buf;
+}
+
+static void sna_memcpy_plane(unsigned char *dst, unsigned char *src,
+ int height, int width,
+ int dstPitch, int srcPitch, Rotation rotation)
+{
+ int i, j = 0;
+ unsigned char *s;
+
+ switch (rotation) {
+ case RR_Rotate_0:
+ /* optimise for the case of no clipping */
+ if (srcPitch == dstPitch && srcPitch == width)
+ memcpy(dst, src, srcPitch * height);
+ else
+ for (i = 0; i < height; i++) {
+ memcpy(dst, src, width);
+ src += srcPitch;
+ dst += dstPitch;
+ }
+ break;
+ case RR_Rotate_90:
+ for (i = 0; i < height; i++) {
+ s = src;
+ for (j = 0; j < width; j++) {
+ dst[(i) + ((width - j - 1) * dstPitch)] = *s++;
+ }
+ src += srcPitch;
+ }
+ break;
+ case RR_Rotate_180:
+ for (i = 0; i < height; i++) {
+ s = src;
+ for (j = 0; j < width; j++) {
+ dst[(width - j - 1) +
+ ((height - i - 1) * dstPitch)] = *s++;
+ }
+ src += srcPitch;
+ }
+ break;
+ case RR_Rotate_270:
+ for (i = 0; i < height; i++) {
+ s = src;
+ for (j = 0; j < width; j++) {
+ dst[(height - i - 1) + (j * dstPitch)] = *s++;
+ }
+ src += srcPitch;
+ }
+ break;
+ }
+}
+
+static void
+sna_copy_planar_data(struct sna *sna,
+ struct sna_video *video,
+ struct sna_video_frame *frame,
+ unsigned char *buf,
+ unsigned char *dst,
+ int srcPitch, int srcPitch2,
+ int srcH, int top, int left)
+{
+ unsigned char *src1, *src2, *src3, *dst1, *dst2, *dst3;
+
+ /* Copy Y data */
+ src1 = buf + (top * srcPitch) + left;
+
+ dst1 = dst + frame->YBufOffset;
+
+ sna_memcpy_plane(dst1, src1,
+ frame->height, frame->width,
+ frame->pitch[1], srcPitch,
+ video->rotation);
+
+ /* Copy V data for YV12, or U data for I420 */
+ src2 = buf + /* start of YUV data */
+ (srcH * srcPitch) + /* move over Luma plane */
+ ((top >> 1) * srcPitch2) + /* move down from by top lines */
+ (left >> 1); /* move left by left pixels */
+
+ if (frame->id == FOURCC_I420)
+ dst2 = dst + frame->UBufOffset;
+ else
+ dst2 = dst + frame->VBufOffset;
+
+ sna_memcpy_plane(dst2, src2,
+ frame->height / 2, frame->width / 2,
+ frame->pitch[0], srcPitch2,
+ video->rotation);
+
+ /* Copy U data for YV12, or V data for I420 */
+ src3 = buf + /* start of YUV data */
+ (srcH * srcPitch) + /* move over Luma plane */
+ ((srcH >> 1) * srcPitch2) + /* move over Chroma plane */
+ ((top >> 1) * srcPitch2) + /* move down from by top lines */
+ (left >> 1); /* move left by left pixels */
+ if (frame->id == FOURCC_I420)
+ dst3 = dst + frame->VBufOffset;
+ else
+ dst3 = dst + frame->UBufOffset;
+
+ sna_memcpy_plane(dst3, src3,
+ frame->height / 2, frame->width / 2,
+ frame->pitch[0], srcPitch2,
+ video->rotation);
+}
+
+static void
+sna_copy_packed_data(struct sna *sna,
+ struct sna_video *video,
+ struct sna_video_frame *frame,
+ unsigned char *buf,
+ unsigned char *dst,
+ int srcPitch,
+ int top, int left)
+{
+ unsigned char *src;
+ unsigned char *s;
+ int i, j;
+
+ src = buf + (top * srcPitch) + (left << 1);
+
+ dst += frame->YBufOffset;
+
+ switch (video->rotation) {
+ case RR_Rotate_0:
+ frame->width <<= 1;
+ for (i = 0; i < frame->height; i++) {
+ memcpy(dst, src, frame->width);
+ src += srcPitch;
+ dst += frame->pitch[0];
+ }
+ break;
+ case RR_Rotate_90:
+ frame->height <<= 1;
+ for (i = 0; i < frame->height; i += 2) {
+ s = src;
+ for (j = 0; j < frame->width; j++) {
+ /* Copy Y */
+ dst[(i + 0) + ((frame->width - j - 1) * frame->pitch[0])] = *s++;
+ (void)*s++;
+ }
+ src += srcPitch;
+ }
+ frame->height >>= 1;
+ src = buf + (top * srcPitch) + (left << 1);
+ for (i = 0; i < frame->height; i += 2) {
+ for (j = 0; j < frame->width; j += 2) {
+ /* Copy U */
+ dst[((i * 2) + 1) + ((frame->width - j - 1) * frame->pitch[0])] =
+ src[(j * 2) + 1 + (i * srcPitch)];
+ dst[((i * 2) + 1) + ((frame->width - j - 2) * frame->pitch[0])] =
+ src[(j * 2) + 1 + ((i + 1) * srcPitch)];
+ /* Copy V */
+ dst[((i * 2) + 3) + ((frame->width - j - 1) * frame->pitch[0])] =
+ src[(j * 2) + 3 + (i * srcPitch)];
+ dst[((i * 2) + 3) + ((frame->width - j - 2) * frame->pitch[0])] =
+ src[(j * 2) + 3 + ((i + 1) * srcPitch)];
+ }
+ }
+ break;
+ case RR_Rotate_180:
+ frame->width <<= 1;
+ for (i = 0; i < frame->height; i++) {
+ s = src;
+ for (j = 0; j < frame->width; j += 4) {
+ dst[(frame->width - j - 4) + ((frame->height - i - 1) * frame->pitch[0])] =
+ *s++;
+ dst[(frame->width - j - 3) + ((frame->height - i - 1) * frame->pitch[0])] =
+ *s++;
+ dst[(frame->width - j - 2) + ((frame->height - i - 1) * frame->pitch[0])] =
+ *s++;
+ dst[(frame->width - j - 1) + ((frame->height - i - 1) * frame->pitch[0])] =
+ *s++;
+ }
+ src += srcPitch;
+ }
+ break;
+ case RR_Rotate_270:
+ frame->height <<= 1;
+ for (i = 0; i < frame->height; i += 2) {
+ s = src;
+ for (j = 0; j < frame->width; j++) {
+ /* Copy Y */
+ dst[(frame->height - i - 2) + (j * frame->pitch[0])] = *s++;
+ (void)*s++;
+ }
+ src += srcPitch;
+ }
+ frame->height >>= 1;
+ src = buf + (top * srcPitch) + (left << 1);
+ for (i = 0; i < frame->height; i += 2) {
+ for (j = 0; j < frame->width; j += 2) {
+ /* Copy U */
+ dst[(((frame->height - i) * 2) - 3) + (j * frame->pitch[0])] =
+ src[(j * 2) + 1 + (i * srcPitch)];
+ dst[(((frame->height - i) * 2) - 3) +
+ ((j + 1) * frame->pitch[0])] =
+ src[(j * 2) + 1 + ((i + 1) * srcPitch)];
+ /* Copy V */
+ dst[(((frame->height - i) * 2) - 1) + (j * frame->pitch[0])] =
+ src[(j * 2) + 3 + (i * srcPitch)];
+ dst[(((frame->height - i) * 2) - 1) +
+ ((j + 1) * frame->pitch[0])] =
+ src[(j * 2) + 3 + ((i + 1) * srcPitch)];
+ }
+ }
+ break;
+ }
+}
+
+Bool
+sna_video_copy_data(struct sna *sna,
+ struct sna_video *video,
+ struct sna_video_frame *frame,
+ int top, int left,
+ int npixels, int nlines,
+ unsigned char *buf)
+{
+ unsigned char *dst;
+
+ frame->bo = sna_video_buffer(sna, video, frame);
+ if (frame->bo == NULL)
+ return FALSE;
+
+ /* copy data */
+ dst = kgem_bo_map(&sna->kgem, frame->bo, PROT_READ | PROT_WRITE);
+ if (dst == NULL)
+ return FALSE;
+
+ if (is_planar_fourcc(frame->id)) {
+ int srcPitch = ALIGN(frame->width, 0x4);
+ int srcPitch2 = ALIGN((frame->width >> 1), 0x4);
+
+ sna_copy_planar_data(sna, video, frame,
+ buf, dst,
+ srcPitch, srcPitch2,
+ nlines, top, left);
+ } else {
+ int srcPitch = frame->width << 1;
+
+ sna_copy_packed_data(sna, video, frame,
+ buf, dst,
+ srcPitch,
+ top, left);
+ }
+
+ munmap(dst, video->buf->size);
+ return TRUE;
+}
+
+static void sna_crtc_box(xf86CrtcPtr crtc, BoxPtr crtc_box)
+{
+ if (crtc->enabled) {
+ crtc_box->x1 = crtc->x;
+ crtc_box->x2 =
+ crtc->x + xf86ModeWidth(&crtc->mode, crtc->rotation);
+ crtc_box->y1 = crtc->y;
+ crtc_box->y2 =
+ crtc->y + xf86ModeHeight(&crtc->mode, crtc->rotation);
+ } else
+ crtc_box->x1 = crtc_box->x2 = crtc_box->y1 = crtc_box->y2 = 0;
+}
+
+static void sna_box_intersect(BoxPtr dest, BoxPtr a, BoxPtr b)
+{
+ dest->x1 = a->x1 > b->x1 ? a->x1 : b->x1;
+ dest->x2 = a->x2 < b->x2 ? a->x2 : b->x2;
+ dest->y1 = a->y1 > b->y1 ? a->y1 : b->y1;
+ dest->y2 = a->y2 < b->y2 ? a->y2 : b->y2;
+ if (dest->x1 >= dest->x2 || dest->y1 >= dest->y2)
+ dest->x1 = dest->x2 = dest->y1 = dest->y2 = 0;
+}
+
+static int sna_box_area(BoxPtr box)
+{
+ return (int)(box->x2 - box->x1) * (int)(box->y2 - box->y1);
+}
+
+/*
+ * Return the crtc covering 'box'. If two crtcs cover a portion of
+ * 'box', then prefer 'desired'. If 'desired' is NULL, then prefer the crtc
+ * with greater coverage
+ */
+
+xf86CrtcPtr
+sna_covering_crtc(ScrnInfoPtr scrn,
+ BoxPtr box, xf86CrtcPtr desired, BoxPtr crtc_box_ret)
+{
+ xf86CrtcConfigPtr xf86_config = XF86_CRTC_CONFIG_PTR(scrn);
+ xf86CrtcPtr crtc, best_crtc;
+ int coverage, best_coverage;
+ int c;
+ BoxRec crtc_box, cover_box;
+
+ DBG(("%s for box=(%d, %d), (%d, %d)\n",
+ __FUNCTION__, box->x1, box->y1, box->x2, box->y2));
+
+ best_crtc = NULL;
+ best_coverage = 0;
+ crtc_box_ret->x1 = 0;
+ crtc_box_ret->x2 = 0;
+ crtc_box_ret->y1 = 0;
+ crtc_box_ret->y2 = 0;
+ for (c = 0; c < xf86_config->num_crtc; c++) {
+ crtc = xf86_config->crtc[c];
+
+ /* If the CRTC is off, treat it as not covering */
+ if (!sna_crtc_on(crtc)) {
+ DBG(("%s: crtc %d off, skipping\n", __FUNCTION__, c));
+ continue;
+ }
+
+ sna_crtc_box(crtc, &crtc_box);
+ sna_box_intersect(&cover_box, &crtc_box, box);
+ coverage = sna_box_area(&cover_box);
+ if (coverage && crtc == desired) {
+ DBG(("%s: box is on desired crtc [%p]\n",
+ __FUNCTION__, crtc));
+ *crtc_box_ret = crtc_box;
+ return crtc;
+ }
+ if (coverage > best_coverage) {
+ *crtc_box_ret = crtc_box;
+ best_crtc = crtc;
+ best_coverage = coverage;
+ }
+ }
+ DBG(("%s: best crtc = %p\n", __FUNCTION__, best_crtc));
+ return best_crtc;
+}
+
+bool
+sna_wait_for_scanline(struct sna *sna, PixmapPtr pixmap,
+ xf86CrtcPtr crtc, RegionPtr clip)
+{
+ pixman_box16_t box, crtc_box;
+ int pipe, event;
+ Bool full_height;
+ int y1, y2;
+ uint32_t *b;
+
+ /* XXX no wait for scanline support on SNB? */
+ if (sna->kgem.gen >= 60)
+ return false;
+
+ if (!pixmap_is_scanout(pixmap))
+ return false;
+
+ if (crtc == NULL) {
+ if (clip) {
+ crtc_box = *REGION_EXTENTS(NULL, clip);
+ } else {
+ crtc_box.x1 = 0; /* XXX drawable offsets? */
+ crtc_box.y1 = 0;
+ crtc_box.x2 = pixmap->drawable.width;
+ crtc_box.y2 = pixmap->drawable.height;
+ }
+ crtc = sna_covering_crtc(sna->scrn, &crtc_box, NULL, &crtc_box);
+ }
+
+ if (crtc == NULL)
+ return false;
+
+ if (clip) {
+ box = *REGION_EXTENTS(unused, clip);
+
+ if (crtc->transform_in_use)
+ pixman_f_transform_bounds(&crtc->f_framebuffer_to_crtc, &box);
+
+ /* We could presume the clip was correctly computed... */
+ sna_crtc_box(crtc, &crtc_box);
+ sna_box_intersect(&box, &crtc_box, &box);
+
+ /*
+ * Make sure we don't wait for a scanline that will
+ * never occur
+ */
+ y1 = (crtc_box.y1 <= box.y1) ? box.y1 - crtc_box.y1 : 0;
+ y2 = (box.y2 <= crtc_box.y2) ?
+ box.y2 - crtc_box.y1 : crtc_box.y2 - crtc_box.y1;
+ if (y2 <= y1)
+ return false;
+
+ full_height = FALSE;
+ if (y1 == 0 && y2 == (crtc_box.y2 - crtc_box.y1))
+ full_height = TRUE;
+ } else {
+ sna_crtc_box(crtc, &crtc_box);
+ y1 = crtc_box.y1;
+ y2 = crtc_box.y2;
+ full_height = TRUE;
+ }
+
+ /*
+ * Pre-965 doesn't have SVBLANK, so we need a bit
+ * of extra time for the blitter to start up and
+ * do its job for a full height blit
+ */
+ if (sna_crtc_to_pipe(crtc) == 0) {
+ pipe = MI_LOAD_SCAN_LINES_DISPLAY_PIPEA;
+ event = MI_WAIT_FOR_PIPEA_SCAN_LINE_WINDOW;
+ if (full_height)
+ event = MI_WAIT_FOR_PIPEA_SVBLANK;
+ } else {
+ pipe = MI_LOAD_SCAN_LINES_DISPLAY_PIPEB;
+ event = MI_WAIT_FOR_PIPEB_SCAN_LINE_WINDOW;
+ if (full_height)
+ event = MI_WAIT_FOR_PIPEB_SVBLANK;
+ }
+
+ if (crtc->mode.Flags & V_INTERLACE) {
+ /* DSL count field lines */
+ y1 /= 2;
+ y2 /= 2;
+ }
+
+ b = kgem_get_batch(&sna->kgem, 5);
+ /* The documentation says that the LOAD_SCAN_LINES command
+ * always comes in pairs. Don't ask me why. */
+ b[0] = MI_LOAD_SCAN_LINES_INCL | pipe;
+ b[1] = (y1 << 16) | (y2-1);
+ b[2] = MI_LOAD_SCAN_LINES_INCL | pipe;
+ b[3] = (y1 << 16) | (y2-1);
+ b[4] = MI_WAIT_FOR_EVENT | event;
+ kgem_advance_batch(&sna->kgem, 5);
+ return true;
+}
+
+void sna_video_init(struct sna *sna, ScreenPtr screen)
+{
+ XF86VideoAdaptorPtr *adaptors, *newAdaptors;
+ XF86VideoAdaptorPtr textured, overlay;
+ int num_adaptors;
+ int prefer_overlay =
+ xf86ReturnOptValBool(sna->Options, OPTION_PREFER_OVERLAY, FALSE);
+
+ num_adaptors = xf86XVListGenericAdaptors(sna->scrn, &adaptors);
+ newAdaptors =
+ malloc((num_adaptors + 2) * sizeof(XF86VideoAdaptorPtr *));
+ if (newAdaptors == NULL)
+ return;
+
+ memcpy(newAdaptors, adaptors,
+ num_adaptors * sizeof(XF86VideoAdaptorPtr));
+ adaptors = newAdaptors;
+
+ /* Set up textured video if we can do it at this depth and we are on
+ * supported hardware.
+ */
+ textured = sna_video_textured_setup(sna, screen);
+ overlay = sna_video_overlay_setup(sna, screen);
+
+ if (overlay && prefer_overlay)
+ adaptors[num_adaptors++] = overlay;
+
+ if (textured)
+ adaptors[num_adaptors++] = textured;
+
+ if (overlay && !prefer_overlay)
+ adaptors[num_adaptors++] = overlay;
+
+ if (num_adaptors)
+ xf86XVScreenInit(screen, adaptors, num_adaptors);
+ else
+ xf86DrvMsg(sna->scrn->scrnIndex, X_WARNING,
+ "Disabling Xv because no adaptors could be initialized.\n");
+ if (textured)
+ sna_video_xvmc_setup(sna, screen, textured);
+
+ free(adaptors);
+}