summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2011-10-18 15:51:33 +0100
committerChris Wilson <chris@chris-wilson.co.uk>2011-10-19 13:11:24 +0100
commit7a9c76e1812d106fedf160c959e6e502998e4ce8 (patch)
treee369bfd1731decffd7b28e7698fdddcb792fc4f5
parent0b83abfb2bc3f65447205048ae0af1a94fa9ef15 (diff)
sna: Micro-optimise fill-spans
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
-rw-r--r--src/sna/Makefile.am1
-rw-r--r--src/sna/compiler.h43
-rw-r--r--src/sna/kgem.h10
-rw-r--r--src/sna/sna.h12
-rw-r--r--src/sna/sna_accel.c70
-rw-r--r--src/sna/sna_blt.c75
-rw-r--r--src/sna/sna_render.h4
-rw-r--r--src/sna/sna_trapezoids.c2
8 files changed, 136 insertions, 81 deletions
diff --git a/src/sna/Makefile.am b/src/sna/Makefile.am
index bacb98fa..2a108012 100644
--- a/src/sna/Makefile.am
+++ b/src/sna/Makefile.am
@@ -32,6 +32,7 @@ NULL:=#
libsna_la_SOURCES = \
blt.c \
+ compiler.h \
kgem.c \
kgem.h \
sna.h \
diff --git a/src/sna/compiler.h b/src/sna/compiler.h
new file mode 100644
index 00000000..0b113102
--- /dev/null
+++ b/src/sna/compiler.h
@@ -0,0 +1,43 @@
+/*
+ * Copyright (c) 2011 Intel Corporation
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the "Software"),
+ * to deal in the Software without restriction, including without limitation
+ * the rights to use, copy, modify, merge, publish, distribute, sublicense,
+ * and/or sell copies of the Software, and to permit persons to whom the
+ * Software is furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the next
+ * paragraph) shall be included in all copies or substantial portions of the
+ * Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
+ * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ * Authors:
+ * Chris Wilson <chris@chris-wilson.co.uk>
+ *
+ */
+
+#ifndef _SNA_COMPILER_H_
+#define _SNA_COMPILER_H_
+
+#if defined(__GNUC__) && (__GNUC__ > 2) && defined(__OPTIMIZE__)
+#define likely(expr) (__builtin_expect (!!(expr), 1))
+#define unlikely(expr) (__builtin_expect (!!(expr), 0))
+#define noinline __attribute__((noinline))
+#define fastcall __attribute__((regparm(3)))
+#else
+#define likely(expr) (expr)
+#define unlikely(expr) (expr)
+#define noinline
+#define fastcall
+#endif
+
+#endif /* _SNA_COMPILER_H_ */
diff --git a/src/sna/kgem.h b/src/sna/kgem.h
index a43a7129..43e126b2 100644
--- a/src/sna/kgem.h
+++ b/src/sna/kgem.h
@@ -25,14 +25,16 @@
*
*/
+#ifndef KGEM_H
+#define KGEM_H
+
#include <stdint.h>
#include <stdbool.h>
#include <stdarg.h>
#include <i915_drm.h>
-#ifndef KGEM_H
-#define KGEM_H
+#include "compiler.h"
#if DEBUG_KGEM
#define DBG_HDR(x) ErrorF x
@@ -249,12 +251,12 @@ static inline void _kgem_set_mode(struct kgem *kgem, enum kgem_mode mode)
static inline bool kgem_check_batch(struct kgem *kgem, int num_dwords)
{
- return kgem->nbatch + num_dwords + KGEM_BATCH_RESERVED <= kgem->surface;
+ return likely(kgem->nbatch + num_dwords + KGEM_BATCH_RESERVED <= kgem->surface);
}
static inline bool kgem_check_reloc(struct kgem *kgem, int num_reloc)
{
- return kgem->nreloc + num_reloc <= KGEM_RELOC_SIZE(kgem);
+ return likely(kgem->nreloc + num_reloc <= KGEM_RELOC_SIZE(kgem));
}
static inline bool kgem_check_batch_with_surfaces(struct kgem *kgem,
diff --git a/src/sna/sna.h b/src/sna/sna.h
index ae043ed2..e67e2271 100644
--- a/src/sna/sna.h
+++ b/src/sna/sna.h
@@ -34,15 +34,15 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
*
*/
+#ifndef _SNA_H_
+#define _SNA_H_
+
#ifdef HAVE_CONFIG_H
#include "config.h"
#endif
#include <stdint.h>
-#ifndef _SNA_H_
-#define _SNA_H_
-
#include "xf86_OSproc.h"
#include "compiler.h"
#include "xf86PciInfo.h"
@@ -67,6 +67,8 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
#include <libudev.h>
#endif
+#include "compiler.h"
+
#define DBG(x)
#define DEBUG_ALL (HAS_DEBUG_FULL || 0)
@@ -474,6 +476,10 @@ Bool sna_transform_is_integer_translation(const PictTransform *t,
Bool sna_transform_is_translation(const PictTransform *t,
pixman_fixed_t *tx, pixman_fixed_t *ty);
+static inline bool wedged(struct sna *sna)
+{
+ return unlikely(sna->kgem.wedged);
+}
static inline uint32_t pixmap_size(PixmapPtr pixmap)
{
diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index 4f5c1525..6101fb69 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -1772,7 +1772,6 @@ sna_fill_spans_blt(DrawablePtr drawable,
struct sna *sna = to_sna_from_drawable(drawable);
PixmapPtr pixmap = get_drawable_pixmap(drawable);
RegionRec clip;
- int need_translation = !gc->miTranslate;
int16_t dx, dy;
struct sna_fill_op fill;
@@ -1788,17 +1787,21 @@ sna_fill_spans_blt(DrawablePtr drawable,
extents->x1, extents->y1, extents->x2, extents->y2,
n, pt->x, pt->y));
- get_drawable_deltas(drawable, pixmap, &dx, &dy);
- while (n--) {
- int X1 = pt->x;
- int y = pt->y;
- int X2 = X1 + (int)*width;
+ if (!gc->miTranslate) {
+ int i;
- if (need_translation) {
- X1 += drawable->x;
- X2 += drawable->x;
- y += drawable->y;
+ for (i = 0; i < n; i++) {
+ /* XXX overflow? */
+ pt->x += drawable->x;
+ pt->y += drawable->y;
}
+ }
+
+ get_drawable_deltas(drawable, pixmap, &dx, &dy);
+ do {
+ int16_t X1 = pt->x;
+ int16_t y = pt->y;
+ int16_t X2 = X1 + (int)*width;
pt++;
width++;
@@ -1817,22 +1820,17 @@ sna_fill_spans_blt(DrawablePtr drawable,
y += dy;
if (clip.data == NULL) {
- X1 += dx;
- X2 += dx;
- assert(X1 >= 0 && X2 <= pixmap->drawable.width);
- if (X2 > X1) {
- fill.blt(sna, &fill, X1, y, X2-X1, 1);
- if (damage) {
- BoxRec box;
+ fill.blt(sna, &fill, X1 + dx, y, X2-X1, 1);
+ if (damage) {
+ BoxRec box;
- box.x1 = X1;
- box.x2 = X2;
- box.y1 = y;
- box.y2 = box.y1 + 1;
+ box.x1 = X1 + dx;
+ box.x2 = X2 + dx;
+ box.y1 = y;
+ box.y2 = box.y1 + 1;
- assert_pixmap_contains_box(pixmap, &box);
- sna_damage_add_box(damage, &box);
- }
+ assert_pixmap_contains_box(pixmap, &box);
+ sna_damage_add_box(damage, &box);
}
} else {
int nc = clip.data->numRects;
@@ -1872,7 +1870,7 @@ sna_fill_spans_blt(DrawablePtr drawable,
b++;
}
}
- }
+ } while (--n);
fill.done(sna, &fill);
RegionUninit(&clip);
return TRUE;
@@ -1960,22 +1958,22 @@ sna_fill_spans(DrawablePtr drawable, GCPtr gc, int n,
DDXPointPtr pt, int *width, int sorted)
{
struct sna *sna = to_sna_from_drawable(drawable);
- BoxRec extents;
RegionRec region;
DBG(("%s(n=%d, pt[0]=(%d, %d)\n",
__FUNCTION__, n, pt[0].x, pt[0].y));
- if (sna_spans_extents(drawable, gc, n, pt, width, &extents))
+ if (sna_spans_extents(drawable, gc, n, pt, width, &region.extents))
return;
DBG(("%s: extents (%d, %d), (%d, %d)\n", __FUNCTION__,
- extents.x1, extents.y1, extents.x2, extents.y2));
+ region.extents.x1, region.extents.y1,
+ region.extents.x2, region.extents.y2));
if (FORCE_FALLBACK)
goto fallback;
- if (sna->kgem.wedged) {
+ if (wedged(sna)) {
DBG(("%s: fallback -- wedged\n", __FUNCTION__));
goto fallback;
}
@@ -1992,20 +1990,20 @@ sna_fill_spans(DrawablePtr drawable, GCPtr gc, int n,
DBG(("%s: trying solid fill [alu=%d, pixel=%08lx] blt paths\n",
__FUNCTION__, gc->alu, gc->fgPixel));
- if (sna_drawable_use_gpu_bo(drawable, &extents) &&
+ if (sna_drawable_use_gpu_bo(drawable, &region.extents) &&
sna_fill_spans_blt(drawable,
priv->gpu_bo,
- priv->gpu_only ? NULL : reduce_damage(drawable, &priv->gpu_damage, &extents),
+ priv->gpu_only ? NULL : reduce_damage(drawable, &priv->gpu_damage, &region.extents),
gc, n, pt, width, sorted,
- &extents))
+ &region.extents))
return;
- if (sna_drawable_use_cpu_bo(drawable, &extents) &&
+ if (sna_drawable_use_cpu_bo(drawable, &region.extents) &&
sna_fill_spans_blt(drawable,
priv->cpu_bo,
- reduce_damage(drawable, &priv->cpu_damage, &extents),
+ reduce_damage(drawable, &priv->cpu_damage, &region.extents),
gc, n, pt, width, sorted,
- &extents))
+ &region.extents))
return;
} else if (gc->fillStyle == FillTiled) {
xRectangle *rect;
@@ -2032,7 +2030,7 @@ sna_fill_spans(DrawablePtr drawable, GCPtr gc, int n,
fallback:
DBG(("%s: fallback\n", __FUNCTION__));
- region_set(&region, &extents);
+ region.data = NULL;
region_maybe_clip(&region, gc->pCompositeClip);
if (!RegionNotEmpty(&region))
return;
diff --git a/src/sna/sna_blt.c b/src/sna/sna_blt.c
index 1597b1fb..01487857 100644
--- a/src/sna/sna_blt.c
+++ b/src/sna/sna_blt.c
@@ -141,7 +141,7 @@ static bool sna_blt_fill_init(struct sna *sna,
kgem_set_mode(kgem, KGEM_BLT);
if (!kgem_check_bo_fenced(kgem, bo, NULL) ||
- !kgem_check_batch(kgem, 9)) {
+ !kgem_check_batch(kgem, 12)) {
_kgem_submit(kgem);
_kgem_set_mode(kgem, KGEM_BLT);
}
@@ -151,7 +151,7 @@ static bool sna_blt_fill_init(struct sna *sna,
{
uint32_t *b;
- if (kgem->nreloc + 1 > KGEM_RELOC_SIZE(kgem)) {
+ if (!kgem_check_reloc(kgem, 1)) {
_kgem_submit(kgem);
_kgem_set_mode(kgem, KGEM_BLT);
}
@@ -181,10 +181,38 @@ static bool sna_blt_fill_init(struct sna *sna,
return TRUE;
}
+noinline static void sna_blt_fill_begin(struct sna *sna,
+ const struct sna_blt_state *blt)
+{
+ struct kgem *kgem = &sna->kgem;
+ uint32_t *b;
+
+ _kgem_submit(kgem);
+ _kgem_set_mode(kgem, KGEM_BLT);
+
+ b = kgem->batch + kgem->nbatch;
+ b[0] = XY_SETUP_MONO_PATTERN_SL_BLT;
+ if (blt->bpp == 32)
+ b[0] |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
+ b[1] = blt->br13;
+ b[2] = 0;
+ b[3] = 0;
+ b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, blt->bo[0],
+ I915_GEM_DOMAIN_RENDER << 16 |
+ I915_GEM_DOMAIN_RENDER |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[5] = blt->pixel;
+ b[6] = blt->pixel;
+ b[7] = 0;
+ b[8] = 0;
+ kgem->nbatch += 9;
+}
+
static void sna_blt_fill_one(struct sna *sna,
const struct sna_blt_state *blt,
- int x, int y,
- int width, int height)
+ int16_t x, int16_t y,
+ int16_t width, int16_t height)
{
struct kgem *kgem = &sna->kgem;
uint32_t *b;
@@ -196,33 +224,13 @@ static void sna_blt_fill_one(struct sna *sna,
assert(y >= 0);
assert((y+height) * blt->bo[0]->pitch <= blt->bo[0]->size);
- if (!kgem_check_batch(kgem, 3)) {
- _kgem_submit(kgem);
- _kgem_set_mode(kgem, KGEM_BLT);
-
- b = kgem->batch + kgem->nbatch;
- b[0] = XY_SETUP_MONO_PATTERN_SL_BLT;
- if (blt->bpp == 32)
- b[0] |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
- b[1] = blt->br13;
- b[2] = 0;
- b[3] = 0;
- b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, blt->bo[0],
- I915_GEM_DOMAIN_RENDER << 16 |
- I915_GEM_DOMAIN_RENDER |
- KGEM_RELOC_FENCED,
- 0);
- b[5] = blt->pixel;
- b[6] = blt->pixel;
- b[7] = 0;
- b[8] = 0;
- kgem->nbatch += 9;
- }
+ if (!kgem_check_batch(kgem, 3))
+ sna_blt_fill_begin(sna, blt);
b = kgem->batch + kgem->nbatch;
b[0] = blt->cmd;
- b[1] = (y << 16) | x;
- b[2] = ((y + height) << 16) | (x + width);
+ b[1] = y << 16 | x;
+ b[2] = b[1] + (height << 16 | width);
kgem->nbatch += 3;
}
@@ -318,8 +326,7 @@ static void sna_blt_copy_one(struct sna *sna,
return;
}
- if (kgem->nbatch + 8 + KGEM_BATCH_RESERVED > kgem->surface ||
- kgem->nreloc + 2 > KGEM_RELOC_SIZE(kgem))
+ if (!kgem_check_batch(kgem, 8) || !kgem_check_reloc(kgem, 2))
_kgem_submit(kgem);
b = kgem->batch + kgem->nbatch;
@@ -1377,8 +1384,8 @@ Bool sna_blt_fill_boxes(struct sna *sna, uint8_t alu,
kgem_set_mode(kgem, KGEM_BLT);
if (!kgem_check_batch(kgem, 6) ||
- !kgem_check_bo_fenced(kgem, bo, NULL) ||
- kgem->nreloc + 1 > KGEM_RELOC_SIZE(kgem))
+ !kgem_check_reloc(kgem, 1) ||
+ !kgem_check_bo_fenced(kgem, bo, NULL))
_kgem_submit(kgem);
do {
@@ -1478,8 +1485,8 @@ Bool sna_blt_copy_boxes(struct sna *sna, uint8_t alu,
kgem_set_mode(kgem, KGEM_BLT);
if (!kgem_check_batch(kgem, 8) ||
- !kgem_check_bo_fenced(kgem, dst_bo, src_bo, NULL) ||
- kgem->nreloc + 2 > KGEM_RELOC_SIZE(kgem))
+ !kgem_check_reloc(kgem, 2) ||
+ !kgem_check_bo_fenced(kgem, dst_bo, src_bo, NULL))
_kgem_submit(kgem);
do {
diff --git a/src/sna/sna_render.h b/src/sna/sna_render.h
index b4b40857..0f174f67 100644
--- a/src/sna/sna_render.h
+++ b/src/sna/sna_render.h
@@ -1,9 +1,9 @@
#ifndef SNA_RENDER_H
#define SNA_RENDER_H
-#define GRADIENT_CACHE_SIZE 16
+#include "compiler.h"
-#define fastcall __attribute__((regparm(3)))
+#define GRADIENT_CACHE_SIZE 16
struct sna;
struct sna_glyph;
diff --git a/src/sna/sna_trapezoids.c b/src/sna/sna_trapezoids.c
index 1549ef50..9801121e 100644
--- a/src/sna/sna_trapezoids.c
+++ b/src/sna/sna_trapezoids.c
@@ -52,8 +52,6 @@
/* TODO: Emit unantialiased and MSAA triangles. */
-#define unlikely(x) x
-
#ifndef MAX
#define MAX(x,y) ((x) >= (y) ? (x) : (y))
#endif