diff options
author | Chris Wilson <chris@chris-wilson.co.uk> | 2011-10-18 15:51:33 +0100 |
---|---|---|
committer | Chris Wilson <chris@chris-wilson.co.uk> | 2011-10-19 13:11:24 +0100 |
commit | 7a9c76e1812d106fedf160c959e6e502998e4ce8 (patch) | |
tree | e369bfd1731decffd7b28e7698fdddcb792fc4f5 | |
parent | 0b83abfb2bc3f65447205048ae0af1a94fa9ef15 (diff) |
sna: Micro-optimise fill-spans
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
-rw-r--r-- | src/sna/Makefile.am | 1 | ||||
-rw-r--r-- | src/sna/compiler.h | 43 | ||||
-rw-r--r-- | src/sna/kgem.h | 10 | ||||
-rw-r--r-- | src/sna/sna.h | 12 | ||||
-rw-r--r-- | src/sna/sna_accel.c | 70 | ||||
-rw-r--r-- | src/sna/sna_blt.c | 75 | ||||
-rw-r--r-- | src/sna/sna_render.h | 4 | ||||
-rw-r--r-- | src/sna/sna_trapezoids.c | 2 |
8 files changed, 136 insertions, 81 deletions
diff --git a/src/sna/Makefile.am b/src/sna/Makefile.am index bacb98fa..2a108012 100644 --- a/src/sna/Makefile.am +++ b/src/sna/Makefile.am @@ -32,6 +32,7 @@ NULL:=# libsna_la_SOURCES = \ blt.c \ + compiler.h \ kgem.c \ kgem.h \ sna.h \ diff --git a/src/sna/compiler.h b/src/sna/compiler.h new file mode 100644 index 00000000..0b113102 --- /dev/null +++ b/src/sna/compiler.h @@ -0,0 +1,43 @@ +/* + * Copyright (c) 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Chris Wilson <chris@chris-wilson.co.uk> + * + */ + +#ifndef _SNA_COMPILER_H_ +#define _SNA_COMPILER_H_ + +#if defined(__GNUC__) && (__GNUC__ > 2) && defined(__OPTIMIZE__) +#define likely(expr) (__builtin_expect (!!(expr), 1)) +#define unlikely(expr) (__builtin_expect (!!(expr), 0)) +#define noinline __attribute__((noinline)) +#define fastcall __attribute__((regparm(3))) +#else +#define likely(expr) (expr) +#define unlikely(expr) (expr) +#define noinline +#define fastcall +#endif + +#endif /* _SNA_COMPILER_H_ */ diff --git a/src/sna/kgem.h b/src/sna/kgem.h index a43a7129..43e126b2 100644 --- a/src/sna/kgem.h +++ b/src/sna/kgem.h @@ -25,14 +25,16 @@ * */ +#ifndef KGEM_H +#define KGEM_H + #include <stdint.h> #include <stdbool.h> #include <stdarg.h> #include <i915_drm.h> -#ifndef KGEM_H -#define KGEM_H +#include "compiler.h" #if DEBUG_KGEM #define DBG_HDR(x) ErrorF x @@ -249,12 +251,12 @@ static inline void _kgem_set_mode(struct kgem *kgem, enum kgem_mode mode) static inline bool kgem_check_batch(struct kgem *kgem, int num_dwords) { - return kgem->nbatch + num_dwords + KGEM_BATCH_RESERVED <= kgem->surface; + return likely(kgem->nbatch + num_dwords + KGEM_BATCH_RESERVED <= kgem->surface); } static inline bool kgem_check_reloc(struct kgem *kgem, int num_reloc) { - return kgem->nreloc + num_reloc <= KGEM_RELOC_SIZE(kgem); + return likely(kgem->nreloc + num_reloc <= KGEM_RELOC_SIZE(kgem)); } static inline bool kgem_check_batch_with_surfaces(struct kgem *kgem, diff --git a/src/sna/sna.h b/src/sna/sna.h index ae043ed2..e67e2271 100644 --- a/src/sna/sna.h +++ b/src/sna/sna.h @@ -34,15 +34,15 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. * */ +#ifndef _SNA_H_ +#define _SNA_H_ + #ifdef HAVE_CONFIG_H #include "config.h" #endif #include <stdint.h> -#ifndef _SNA_H_ -#define _SNA_H_ - #include "xf86_OSproc.h" #include "compiler.h" #include "xf86PciInfo.h" @@ -67,6 +67,8 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. #include <libudev.h> #endif +#include "compiler.h" + #define DBG(x) #define DEBUG_ALL (HAS_DEBUG_FULL || 0) @@ -474,6 +476,10 @@ Bool sna_transform_is_integer_translation(const PictTransform *t, Bool sna_transform_is_translation(const PictTransform *t, pixman_fixed_t *tx, pixman_fixed_t *ty); +static inline bool wedged(struct sna *sna) +{ + return unlikely(sna->kgem.wedged); +} static inline uint32_t pixmap_size(PixmapPtr pixmap) { diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c index 4f5c1525..6101fb69 100644 --- a/src/sna/sna_accel.c +++ b/src/sna/sna_accel.c @@ -1772,7 +1772,6 @@ sna_fill_spans_blt(DrawablePtr drawable, struct sna *sna = to_sna_from_drawable(drawable); PixmapPtr pixmap = get_drawable_pixmap(drawable); RegionRec clip; - int need_translation = !gc->miTranslate; int16_t dx, dy; struct sna_fill_op fill; @@ -1788,17 +1787,21 @@ sna_fill_spans_blt(DrawablePtr drawable, extents->x1, extents->y1, extents->x2, extents->y2, n, pt->x, pt->y)); - get_drawable_deltas(drawable, pixmap, &dx, &dy); - while (n--) { - int X1 = pt->x; - int y = pt->y; - int X2 = X1 + (int)*width; + if (!gc->miTranslate) { + int i; - if (need_translation) { - X1 += drawable->x; - X2 += drawable->x; - y += drawable->y; + for (i = 0; i < n; i++) { + /* XXX overflow? */ + pt->x += drawable->x; + pt->y += drawable->y; } + } + + get_drawable_deltas(drawable, pixmap, &dx, &dy); + do { + int16_t X1 = pt->x; + int16_t y = pt->y; + int16_t X2 = X1 + (int)*width; pt++; width++; @@ -1817,22 +1820,17 @@ sna_fill_spans_blt(DrawablePtr drawable, y += dy; if (clip.data == NULL) { - X1 += dx; - X2 += dx; - assert(X1 >= 0 && X2 <= pixmap->drawable.width); - if (X2 > X1) { - fill.blt(sna, &fill, X1, y, X2-X1, 1); - if (damage) { - BoxRec box; + fill.blt(sna, &fill, X1 + dx, y, X2-X1, 1); + if (damage) { + BoxRec box; - box.x1 = X1; - box.x2 = X2; - box.y1 = y; - box.y2 = box.y1 + 1; + box.x1 = X1 + dx; + box.x2 = X2 + dx; + box.y1 = y; + box.y2 = box.y1 + 1; - assert_pixmap_contains_box(pixmap, &box); - sna_damage_add_box(damage, &box); - } + assert_pixmap_contains_box(pixmap, &box); + sna_damage_add_box(damage, &box); } } else { int nc = clip.data->numRects; @@ -1872,7 +1870,7 @@ sna_fill_spans_blt(DrawablePtr drawable, b++; } } - } + } while (--n); fill.done(sna, &fill); RegionUninit(&clip); return TRUE; @@ -1960,22 +1958,22 @@ sna_fill_spans(DrawablePtr drawable, GCPtr gc, int n, DDXPointPtr pt, int *width, int sorted) { struct sna *sna = to_sna_from_drawable(drawable); - BoxRec extents; RegionRec region; DBG(("%s(n=%d, pt[0]=(%d, %d)\n", __FUNCTION__, n, pt[0].x, pt[0].y)); - if (sna_spans_extents(drawable, gc, n, pt, width, &extents)) + if (sna_spans_extents(drawable, gc, n, pt, width, ®ion.extents)) return; DBG(("%s: extents (%d, %d), (%d, %d)\n", __FUNCTION__, - extents.x1, extents.y1, extents.x2, extents.y2)); + region.extents.x1, region.extents.y1, + region.extents.x2, region.extents.y2)); if (FORCE_FALLBACK) goto fallback; - if (sna->kgem.wedged) { + if (wedged(sna)) { DBG(("%s: fallback -- wedged\n", __FUNCTION__)); goto fallback; } @@ -1992,20 +1990,20 @@ sna_fill_spans(DrawablePtr drawable, GCPtr gc, int n, DBG(("%s: trying solid fill [alu=%d, pixel=%08lx] blt paths\n", __FUNCTION__, gc->alu, gc->fgPixel)); - if (sna_drawable_use_gpu_bo(drawable, &extents) && + if (sna_drawable_use_gpu_bo(drawable, ®ion.extents) && sna_fill_spans_blt(drawable, priv->gpu_bo, - priv->gpu_only ? NULL : reduce_damage(drawable, &priv->gpu_damage, &extents), + priv->gpu_only ? NULL : reduce_damage(drawable, &priv->gpu_damage, ®ion.extents), gc, n, pt, width, sorted, - &extents)) + ®ion.extents)) return; - if (sna_drawable_use_cpu_bo(drawable, &extents) && + if (sna_drawable_use_cpu_bo(drawable, ®ion.extents) && sna_fill_spans_blt(drawable, priv->cpu_bo, - reduce_damage(drawable, &priv->cpu_damage, &extents), + reduce_damage(drawable, &priv->cpu_damage, ®ion.extents), gc, n, pt, width, sorted, - &extents)) + ®ion.extents)) return; } else if (gc->fillStyle == FillTiled) { xRectangle *rect; @@ -2032,7 +2030,7 @@ sna_fill_spans(DrawablePtr drawable, GCPtr gc, int n, fallback: DBG(("%s: fallback\n", __FUNCTION__)); - region_set(®ion, &extents); + region.data = NULL; region_maybe_clip(®ion, gc->pCompositeClip); if (!RegionNotEmpty(®ion)) return; diff --git a/src/sna/sna_blt.c b/src/sna/sna_blt.c index 1597b1fb..01487857 100644 --- a/src/sna/sna_blt.c +++ b/src/sna/sna_blt.c @@ -141,7 +141,7 @@ static bool sna_blt_fill_init(struct sna *sna, kgem_set_mode(kgem, KGEM_BLT); if (!kgem_check_bo_fenced(kgem, bo, NULL) || - !kgem_check_batch(kgem, 9)) { + !kgem_check_batch(kgem, 12)) { _kgem_submit(kgem); _kgem_set_mode(kgem, KGEM_BLT); } @@ -151,7 +151,7 @@ static bool sna_blt_fill_init(struct sna *sna, { uint32_t *b; - if (kgem->nreloc + 1 > KGEM_RELOC_SIZE(kgem)) { + if (!kgem_check_reloc(kgem, 1)) { _kgem_submit(kgem); _kgem_set_mode(kgem, KGEM_BLT); } @@ -181,10 +181,38 @@ static bool sna_blt_fill_init(struct sna *sna, return TRUE; } +noinline static void sna_blt_fill_begin(struct sna *sna, + const struct sna_blt_state *blt) +{ + struct kgem *kgem = &sna->kgem; + uint32_t *b; + + _kgem_submit(kgem); + _kgem_set_mode(kgem, KGEM_BLT); + + b = kgem->batch + kgem->nbatch; + b[0] = XY_SETUP_MONO_PATTERN_SL_BLT; + if (blt->bpp == 32) + b[0] |= BLT_WRITE_ALPHA | BLT_WRITE_RGB; + b[1] = blt->br13; + b[2] = 0; + b[3] = 0; + b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, blt->bo[0], + I915_GEM_DOMAIN_RENDER << 16 | + I915_GEM_DOMAIN_RENDER | + KGEM_RELOC_FENCED, + 0); + b[5] = blt->pixel; + b[6] = blt->pixel; + b[7] = 0; + b[8] = 0; + kgem->nbatch += 9; +} + static void sna_blt_fill_one(struct sna *sna, const struct sna_blt_state *blt, - int x, int y, - int width, int height) + int16_t x, int16_t y, + int16_t width, int16_t height) { struct kgem *kgem = &sna->kgem; uint32_t *b; @@ -196,33 +224,13 @@ static void sna_blt_fill_one(struct sna *sna, assert(y >= 0); assert((y+height) * blt->bo[0]->pitch <= blt->bo[0]->size); - if (!kgem_check_batch(kgem, 3)) { - _kgem_submit(kgem); - _kgem_set_mode(kgem, KGEM_BLT); - - b = kgem->batch + kgem->nbatch; - b[0] = XY_SETUP_MONO_PATTERN_SL_BLT; - if (blt->bpp == 32) - b[0] |= BLT_WRITE_ALPHA | BLT_WRITE_RGB; - b[1] = blt->br13; - b[2] = 0; - b[3] = 0; - b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, blt->bo[0], - I915_GEM_DOMAIN_RENDER << 16 | - I915_GEM_DOMAIN_RENDER | - KGEM_RELOC_FENCED, - 0); - b[5] = blt->pixel; - b[6] = blt->pixel; - b[7] = 0; - b[8] = 0; - kgem->nbatch += 9; - } + if (!kgem_check_batch(kgem, 3)) + sna_blt_fill_begin(sna, blt); b = kgem->batch + kgem->nbatch; b[0] = blt->cmd; - b[1] = (y << 16) | x; - b[2] = ((y + height) << 16) | (x + width); + b[1] = y << 16 | x; + b[2] = b[1] + (height << 16 | width); kgem->nbatch += 3; } @@ -318,8 +326,7 @@ static void sna_blt_copy_one(struct sna *sna, return; } - if (kgem->nbatch + 8 + KGEM_BATCH_RESERVED > kgem->surface || - kgem->nreloc + 2 > KGEM_RELOC_SIZE(kgem)) + if (!kgem_check_batch(kgem, 8) || !kgem_check_reloc(kgem, 2)) _kgem_submit(kgem); b = kgem->batch + kgem->nbatch; @@ -1377,8 +1384,8 @@ Bool sna_blt_fill_boxes(struct sna *sna, uint8_t alu, kgem_set_mode(kgem, KGEM_BLT); if (!kgem_check_batch(kgem, 6) || - !kgem_check_bo_fenced(kgem, bo, NULL) || - kgem->nreloc + 1 > KGEM_RELOC_SIZE(kgem)) + !kgem_check_reloc(kgem, 1) || + !kgem_check_bo_fenced(kgem, bo, NULL)) _kgem_submit(kgem); do { @@ -1478,8 +1485,8 @@ Bool sna_blt_copy_boxes(struct sna *sna, uint8_t alu, kgem_set_mode(kgem, KGEM_BLT); if (!kgem_check_batch(kgem, 8) || - !kgem_check_bo_fenced(kgem, dst_bo, src_bo, NULL) || - kgem->nreloc + 2 > KGEM_RELOC_SIZE(kgem)) + !kgem_check_reloc(kgem, 2) || + !kgem_check_bo_fenced(kgem, dst_bo, src_bo, NULL)) _kgem_submit(kgem); do { diff --git a/src/sna/sna_render.h b/src/sna/sna_render.h index b4b40857..0f174f67 100644 --- a/src/sna/sna_render.h +++ b/src/sna/sna_render.h @@ -1,9 +1,9 @@ #ifndef SNA_RENDER_H #define SNA_RENDER_H -#define GRADIENT_CACHE_SIZE 16 +#include "compiler.h" -#define fastcall __attribute__((regparm(3))) +#define GRADIENT_CACHE_SIZE 16 struct sna; struct sna_glyph; diff --git a/src/sna/sna_trapezoids.c b/src/sna/sna_trapezoids.c index 1549ef50..9801121e 100644 --- a/src/sna/sna_trapezoids.c +++ b/src/sna/sna_trapezoids.c @@ -52,8 +52,6 @@ /* TODO: Emit unantialiased and MSAA triangles. */ -#define unlikely(x) x - #ifndef MAX #define MAX(x,y) ((x) >= (y) ? (x) : (y)) #endif |