diff options
author | Chris Wilson <chris@chris-wilson.co.uk> | 2011-04-08 07:17:14 +0100 |
---|---|---|
committer | Chris Wilson <chris@chris-wilson.co.uk> | 2011-06-04 09:19:46 +0100 |
commit | bcef98af561939aa48d9236b2dfa2c5626adf4cb (patch) | |
tree | 9d05558947a97595a6fdece968b50eeae45bbfb1 /src/sna/sna_trapezoids.c | |
parent | 340cfb7f5271fd1df4c8948e5c9336f5b69a6e6c (diff) |
sna: Introduce a new acceleration model.
The premise is that switching between rings (i.e. the BLT and
RENDER rings) on SandyBridge imposes a large latency overhead whilst
rendering. The cause is that in order to switch rings, we need to split
the batch earlier than is desired and to add serialisation between the
rings. Both of which incur large overhead.
By switching to using a pure 3D blit engine (ok, not so pure as the BLT
engine still has uses for the core drawing model which can not be easily
represented without a combinatorial explosion of shaders) we can take
advantage of additional efficiencies, such as relative relocations, that
have been incorporated into recent hardware advances. However, even
older hardware performs better from avoiding the implicit context
switches and from the batching efficiency of the 3D pipeline...
But this is X, and PolyGlyphBlt still exists and remains in use. So for
the operations that are not worth accelerating in hardware, we introduce a
shadow buffer mechanism through out and reintroduce pixmap migration.
Doing this efficiently is the cornerstone of ensuring that we do exploit
the increased potential of recent hardware for running old applications and
environments (i.e. so that the latest and greatest chip is actually faster
than gen2!)
For the curious, sna is SandyBridge's New Acceleration. If you are
running older chipsets and welcome the performance increase offered by
this patch, then you may choose to call it Snazzy instead.
Speedups
========
gen3 firefox-fishtank 1203584.56 (1203842.75 0.01%) -> 85561.71 (125146.44 14.87%): 14.07x speedup
gen5 grads-heat-map 3385.42 (3489.73 1.44%) -> 350.29 (350.75 0.18%): 9.66x speedup
gen3 xfce4-terminal-a1 4179.02 (4180.09 0.06%) -> 503.90 (531.88 4.48%): 8.29x speedup
gen4 grads-heat-map 2458.66 (2826.34 4.64%) -> 348.82 (349.20 0.29%): 7.05x speedup
gen3 grads-heat-map 1443.33 (1445.32 0.09%) -> 298.55 (298.76 0.05%): 4.83x speedup
gen3 swfdec-youtube 3836.14 (3894.14 0.95%) -> 889.84 (979.56 5.99%): 4.31x speedup
gen6 grads-heat-map 742.11 (744.44 0.15%) -> 172.51 (172.93 0.20%): 4.30x speedup
gen3 firefox-talos-svg 71740.44 (72370.13 0.59%) -> 21959.29 (21995.09 0.68%): 3.27x speedup
gen5 gvim 8045.51 (8071.47 0.17%) -> 2589.38 (3246.78 10.74%): 3.11x speedup
gen6 poppler 3800.78 (3817.92 0.24%) -> 1227.36 (1230.12 0.30%): 3.10x speedup
gen6 gnome-terminal-vim 9106.84 (9111.56 0.03%) -> 3459.49 (3478.52 0.25%): 2.63x speedup
gen5 midori-zoomed 9564.53 (9586.58 0.17%) -> 3677.73 (3837.02 2.02%): 2.60x speedup
gen5 gnome-terminal-vim 38167.25 (38215.82 0.08%) -> 14901.09 (14902.28 0.01%): 2.56x speedup
gen5 poppler 13575.66 (13605.04 0.16%) -> 5554.27 (5555.84 0.01%): 2.44x speedup
gen5 swfdec-giant-steps 8941.61 (8988.72 0.52%) -> 3851.98 (3871.01 0.93%): 2.32x speedup
gen5 xfce4-terminal-a1 18956.60 (18986.90 0.07%) -> 8362.75 (8365.70 0.01%): 2.27x speedup
gen5 firefox-fishtank 88750.31 (88858.23 0.14%) -> 39164.57 (39835.54 0.80%): 2.27x speedup
gen3 midori-zoomed 2392.13 (2397.82 0.14%) -> 1109.96 (1303.10 30.35%): 2.16x speedup
gen6 gvim 2510.34 (2513.34 0.20%) -> 1200.76 (1204.30 0.22%): 2.09x speedup
gen5 firefox-planet-gnome 40478.16 (40565.68 0.09%) -> 19606.22 (19648.79 0.16%): 2.06x speedup
gen5 gnome-system-monitor 10344.47 (10385.62 0.29%) -> 5136.69 (5256.85 1.15%): 2.01x speedup
gen3 poppler 2595.23 (2603.10 0.17%) -> 1297.56 (1302.42 0.61%): 2.00x speedup
gen6 firefox-talos-gfx 7184.03 (7194.97 0.13%) -> 3806.31 (3811.66 0.06%): 1.89x speedup
gen5 evolution 8739.25 (8766.12 0.27%) -> 4817.54 (5050.96 1.54%): 1.81x speedup
gen3 evolution 1684.06 (1696.88 0.35%) -> 1004.99 (1008.55 0.85%): 1.68x speedup
gen3 gnome-terminal-vim 4285.13 (4287.68 0.04%) -> 2715.97 (3202.17 13.52%): 1.58x speedup
gen5 swfdec-youtube 5843.94 (5951.07 0.91%) -> 3810.86 (3826.04 1.32%): 1.53x speedup
gen4 poppler 7496.72 (7558.83 0.58%) -> 5125.08 (5247.65 1.44%): 1.46x speedup
gen4 gnome-terminal-vim 21126.24 (21292.08 0.85%) -> 14590.25 (15066.33 1.80%): 1.45x speedup
gen5 firefox-talos-svg 99873.69 (100300.95 0.37%) -> 70745.66 (70818.86 0.05%): 1.41x speedup
gen4 firefox-planet-gnome 28205.10 (28304.45 0.27%) -> 19996.11 (20081.44 0.56%): 1.41x speedup
gen5 firefox-talos-gfx 93070.85 (93194.72 0.10%) -> 67687.93 (70374.37 1.30%): 1.37x speedup
gen4 evolution 6696.25 (6854.14 0.85%) -> 4958.62 (5027.73 0.85%): 1.35x speedup
gen3 swfdec-giant-steps 2538.03 (2539.30 0.04%) -> 1895.71 (2050.62 62.43%): 1.34x speedup
gen4 gvim 4356.18 (4422.78 0.70%) -> 3276.31 (3281.69 0.13%): 1.33x speedup
gen6 evolution 1242.13 (1245.44 0.72%) -> 953.76 (954.54 0.07%): 1.30x speedup
gen6 firefox-planet-gnome 4554.23 (4560.69 0.08%) -> 3758.76 (3768.97 0.28%): 1.21x speedup
gen3 firefox-talos-gfx 6264.13 (6284.65 0.30%) -> 5261.56 (5370.87 1.28%): 1.19x speedup
gen4 midori-zoomed 4771.13 (4809.90 0.73%) -> 4037.03 (4118.93 0.85%): 1.18x speedup
gen6 swfdec-giant-steps 1557.06 (1560.13 0.12%) -> 1336.34 (1341.29 0.32%): 1.17x speedup
gen4 firefox-talos-gfx 80767.28 (80986.31 0.17%) -> 69629.08 (69721.71 0.06%): 1.16x speedup
gen6 midori-zoomed 1463.70 (1463.76 0.08%) -> 1331.45 (1336.56 0.22%): 1.10x speedup
Slowdowns
=========
gen6 xfce4-terminal-a1 2030.25 (2036.23 0.25%) -> 2144.60 (2240.31 4.29%): 1.06x slowdown
gen4 swfdec-youtube 3580.00 (3597.23 3.92%) -> 3826.90 (3862.24 0.91%): 1.07x slowdown
gen4 firefox-talos-svg 66112.25 (66256.51 0.11%) -> 71433.40 (71584.31 0.14%): 1.08x slowdown
gen4 gnome-system-monitor 5691.60 (5724.03 0.56%) -> 6707.56 (6747.83 0.33%): 1.18x slowdown
gen3 ocitysmap 3494.05 (3502.44 0.20%) -> 4321.99 (4524.42 2.78%): 1.24x slowdown
gen4 ocitysmap 3628.42 (3641.66 9.37%) -> 5177.16 (5828.74 8.38%): 1.43x slowdown
gen5 ocitysmap 4027.77 (4068.11 0.80%) -> 5748.26 (6282.25 7.38%): 1.43x slowdown
gen6 ocitysmap 1401.61 (1402.24 0.40%) -> 2365.74 (2379.14 4.12%): 1.69x slowdown
[Note the performance regression for ocitysmap comes from that we now
attempt to support rendering to and (more importantly) from large
surfaces. By enabling such operations is the only way to one day be
faster than purely using the CPU, in the meantime we suffer regression
due to the increased migration and aperture thrashing. The other couple
of regressions will be eliminated with improved span and shader support,
now that the framework for such is in place.]
The performance increase for Cairo completely overlooks the other
critical aspects of the architecture:
World of Padman:
gen3 (800x600): 57.5 -> 96.2
gen4 (800x600): 47.8 -> 74.6
gen6 (1366x768): 100.4 -> 140.3 [F15]
144.3 -> 146.4 [drm-intel-next]
x11perf (gen6);
aa10text: 3.47 -> 14.3 Mglyphs/s [unthrottled!]
copywinwin10: 1.66 -> 1.99 Mops/s
copywinpix10: 2.28 -> 2.98 Mops/s
And we do not have a good measure for how much improvement the reworking
of the fallback paths give, except that xterm is now over 4x faster...
PS: This depends upon the Xorg patchset "Remove the cacheing of the last
scratch PixmapRec" for correct invalidations of scratch Pixmaps (used by
the dix to implement SHM operations, used by chromium and gtk+ pixbufs.
PPS: ./configure --enable-sna
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Diffstat (limited to 'src/sna/sna_trapezoids.c')
-rw-r--r-- | src/sna/sna_trapezoids.c | 2375 |
1 files changed, 2375 insertions, 0 deletions
diff --git a/src/sna/sna_trapezoids.c b/src/sna/sna_trapezoids.c new file mode 100644 index 00000000..db9e085b --- /dev/null +++ b/src/sna/sna_trapezoids.c @@ -0,0 +1,2375 @@ +/* + * Copyright (c) 2007 David Turner + * Copyright (c) 2008 M Joonas Pihlaja + * Copyright (c) 2011 Intel Corporation + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Chris Wilson <chris@chris-wilson.co.uk> + * + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "sna.h" +#include "sna_render.h" +#include "sna_render_inline.h" + +#include <mipict.h> +#include <fbpict.h> + +#if DEBUG_TRAPEZOIDS +#undef DBG +#define DBG(x) ErrorF x +#else +#define NDEBUG 1 +#endif + +#define NO_ACCEL 0 + +#define unlikely(x) x + +#define SAMPLES_X 17 +#define SAMPLES_Y 15 + +#define FAST_SAMPLES_X_shift 8 +#define FAST_SAMPLES_Y_shift 4 + +#define FAST_SAMPLES_X (1<<FAST_SAMPLES_X_shift) +#define FAST_SAMPLES_Y (1<<FAST_SAMPLES_Y_shift) + +#if DEBUG_TRAPEZOIDS +static void _assert_pixmap_contains_box(PixmapPtr pixmap, BoxPtr box, const char *function) +{ + if (box->x1 < 0 || box->y1 < 0 || + box->x2 > pixmap->drawable.width || + box->y2 > pixmap->drawable.height) + { + ErrorF("%s: damage box is beyond the pixmap: box=(%d, %d), (%d, %d), pixmap=(%d, %d)\n", + __FUNCTION__, + box->x1, box->y1, box->x2, box->y2, + pixmap->drawable.width, + pixmap->drawable.height); + assert(0); + } +} +#define assert_pixmap_contains_box(p, b) _assert_pixmap_contains_box(p, b, __FUNCTION__) +#else +#define assert_pixmap_contains_box(p, b) +#endif + +static void apply_damage(struct sna_composite_op *op, RegionPtr region) +{ + DBG(("%s: damage=%p, region=%d\n", + __FUNCTION__, op->damage, REGION_NUM_RECTS(region))); + + if (op->damage == NULL) + return; + + RegionTranslate(region, op->dst.x, op->dst.y); + + assert_pixmap_contains_box(op->dst.pixmap, RegionExtents(region)); + sna_damage_add(op->damage, region); +} + +static void apply_damage_box(struct sna_composite_op *op, const BoxRec *box) +{ + BoxRec r; + + if (op->damage == NULL) + return; + + r.x1 = box->x1 + op->dst.x; + r.x2 = box->x2 + op->dst.x; + r.y1 = box->y1 + op->dst.y; + r.y2 = box->y2 + op->dst.y; + + assert_pixmap_contains_box(op->dst.pixmap, &r); + sna_damage_add_box(op->damage, &r); +} + +typedef int grid_scaled_x_t; +typedef int grid_scaled_y_t; + +#define FAST_SAMPLES_X_TO_INT_FRAC(x, i, f) \ + _GRID_TO_INT_FRAC_shift(x, i, f, FAST_SAMPLES_X_shift) + +#define _GRID_TO_INT_FRAC_shift(t, i, f, b) do { \ + (f) = (t) & ((1 << (b)) - 1); \ + (i) = (t) >> (b); \ +} while (0) + +/* A grid area is a real in [0,1] scaled by 2*SAMPLES_X*SAMPLES_Y. We want + * to be able to represent exactly areas of subpixel trapezoids whose + * vertices are given in grid scaled coordinates. The scale factor + * comes from needing to accurately represent the area 0.5*dx*dy of a + * triangle with base dx and height dy in grid scaled numbers. */ +typedef int grid_area_t; +#define FAST_SAMPLES_XY (2*FAST_SAMPLES_X*FAST_SAMPLES_Y) /* Unit area on the grid. */ + +#define AREA_TO_ALPHA(c) ((c) / (float)FAST_SAMPLES_XY) + +struct quorem { + int32_t quo; + int32_t rem; +}; + +struct _pool_chunk { + size_t size; + size_t capacity; + + struct _pool_chunk *prev_chunk; + /* Actual data starts here. Well aligned for pointers. */ +}; + +/* A memory pool. This is supposed to be embedded on the stack or + * within some other structure. It may optionally be followed by an + * embedded array from which requests are fulfilled until + * malloc needs to be called to allocate a first real chunk. */ +struct pool { + struct _pool_chunk *current; + struct _pool_chunk *first_free; + + /* The default capacity of a chunk. */ + size_t default_capacity; + + /* Header for the sentinel chunk. Directly following the pool + * struct should be some space for embedded elements from which + * the sentinel chunk allocates from. */ + struct _pool_chunk sentinel[1]; +}; + +/* A polygon edge. */ +struct edge { + /* Next in y-bucket or active list. */ + struct edge *next; + + /* Current x coordinate while the edge is on the active + * list. Initialised to the x coordinate of the top of the + * edge. The quotient is in grid_scaled_x_t units and the + * remainder is mod dy in grid_scaled_y_t units.*/ + struct quorem x; + + /* Advance of the current x when moving down a subsample line. */ + struct quorem dxdy; + + /* Advance of the current x when moving down a full pixel + * row. Only initialised when the height of the edge is large + * enough that there's a chance the edge could be stepped by a + * full row's worth of subsample rows at a time. */ + struct quorem dxdy_full; + + /* The clipped y of the top of the edge. */ + grid_scaled_y_t ytop; + + /* y2-y1 after orienting the edge downwards. */ + grid_scaled_y_t dy; + + /* Number of subsample rows remaining to scan convert of this + * edge. */ + grid_scaled_y_t height_left; + + /* Original sign of the edge: +1 for downwards, -1 for upwards + * edges. */ + int dir; + int vertical; +}; + +/* Number of subsample rows per y-bucket. Must be SAMPLES_Y. */ +#define EDGE_Y_BUCKET_HEIGHT FAST_SAMPLES_Y +#define EDGE_Y_BUCKET_INDEX(y, ymin) (((y) - (ymin))/EDGE_Y_BUCKET_HEIGHT) + +/* A collection of sorted and vertically clipped edges of the polygon. + * Edges are moved from the polygon to an active list while scan + * converting. */ +struct polygon { + /* The vertical clip extents. */ + grid_scaled_y_t ymin, ymax; + + /* Array of edges all starting in the same bucket. An edge is put + * into bucket EDGE_BUCKET_INDEX(edge->ytop, polygon->ymin) when + * it is added to the polygon. */ + struct edge **y_buckets; + struct edge *y_buckets_embedded[64]; + + struct edge edges_embedded[32]; + struct edge *edges; + int num_edges; +}; + +/* A cell records the effect on pixel coverage of polygon edges + * passing through a pixel. It contains two accumulators of pixel + * coverage. + * + * Consider the effects of a polygon edge on the coverage of a pixel + * it intersects and that of the following one. The coverage of the + * following pixel is the height of the edge multiplied by the width + * of the pixel, and the coverage of the pixel itself is the area of + * the trapezoid formed by the edge and the right side of the pixel. + * + * +-----------------------+-----------------------+ + * | | | + * | | | + * |_______________________|_______________________| + * | \...................|.......................|\ + * | \..................|.......................| | + * | \.................|.......................| | + * | \....covered.....|.......................| | + * | \....area.......|.......................| } covered height + * | \..............|.......................| | + * |uncovered\.............|.......................| | + * | area \............|.......................| | + * |___________\...........|.......................|/ + * | | | + * | | | + * | | | + * +-----------------------+-----------------------+ + * + * Since the coverage of the following pixel will always be a multiple + * of the width of the pixel, we can store the height of the covered + * area instead. The coverage of the pixel itself is the total + * coverage minus the area of the uncovered area to the left of the + * edge. As it's faster to compute the uncovered area we only store + * that and subtract it from the total coverage later when forming + * spans to blit. + * + * The heights and areas are signed, with left edges of the polygon + * having positive sign and right edges having negative sign. When + * two edges intersect they swap their left/rightness so their + * contribution above and below the intersection point must be + * computed separately. */ +struct cell { + struct cell *next; + int x; + grid_area_t uncovered_area; + grid_scaled_y_t covered_height; +}; + +/* A cell list represents the scan line sparsely as cells ordered by + * ascending x. It is geared towards scanning the cells in order + * using an internal cursor. */ +struct cell_list { + /* Points to the left-most cell in the scan line. */ + struct cell *head; + /* Sentinel node */ + struct cell tail; + + struct cell **cursor; + + /* Cells in the cell list are owned by the cell list and are + * allocated from this pool. */ + struct { + struct pool base[1]; + struct cell embedded[32]; + } cell_pool; +}; + +/* The active list contains edges in the current scan line ordered by + * the x-coordinate of the intercept of the edge and the scan line. */ +struct active_list { + /* Leftmost edge on the current scan line. */ + struct edge *head; + + /* A lower bound on the height of the active edges is used to + * estimate how soon some active edge ends. We can't advance the + * scan conversion by a full pixel row if an edge ends somewhere + * within it. */ + grid_scaled_y_t min_height; +}; + +struct tor { + struct polygon polygon[1]; + struct active_list active[1]; + struct cell_list coverages[1]; + + /* Clip box. */ + grid_scaled_x_t xmin, xmax; + grid_scaled_y_t ymin, ymax; +}; + +/* Compute the floored division a/b. Assumes / and % perform symmetric + * division. */ +inline static struct quorem +floored_divrem(int a, int b) +{ + struct quorem qr; + qr.quo = a/b; + qr.rem = a%b; + if ((a^b)<0 && qr.rem) { + qr.quo -= 1; + qr.rem += b; + } + return qr; +} + +/* Compute the floored division (x*a)/b. Assumes / and % perform symmetric + * division. */ +static struct quorem +floored_muldivrem(int x, int a, int b) +{ + struct quorem qr; + long long xa = (long long)x*a; + qr.quo = xa/b; + qr.rem = xa%b; + if ((xa>=0) != (b>=0) && qr.rem) { + qr.quo -= 1; + qr.rem += b; + } + return qr; +} + +static void +_pool_chunk_init( + struct _pool_chunk *p, + struct _pool_chunk *prev_chunk, + size_t capacity) +{ + p->prev_chunk = prev_chunk; + p->size = 0; + p->capacity = capacity; +} + +static struct _pool_chunk * +_pool_chunk_create(struct _pool_chunk *prev_chunk, size_t size) +{ + struct _pool_chunk *p; + size_t size_with_head = size + sizeof(struct _pool_chunk); + + if (size_with_head < size) + return NULL; + + p = malloc(size_with_head); + if (p) + _pool_chunk_init(p, prev_chunk, size); + + return p; +} + +static void +pool_init(struct pool *pool, + size_t default_capacity, + size_t embedded_capacity) +{ + pool->current = pool->sentinel; + pool->first_free = NULL; + pool->default_capacity = default_capacity; + _pool_chunk_init(pool->sentinel, NULL, embedded_capacity); +} + +static void +pool_fini(struct pool *pool) +{ + struct _pool_chunk *p = pool->current; + do { + while (NULL != p) { + struct _pool_chunk *prev = p->prev_chunk; + if (p != pool->sentinel) + free(p); + p = prev; + } + p = pool->first_free; + pool->first_free = NULL; + } while (NULL != p); + pool_init(pool, 0, 0); +} + +/* Satisfy an allocation by first allocating a new large enough chunk + * and adding it to the head of the pool's chunk list. This function + * is called as a fallback if pool_alloc() couldn't do a quick + * allocation from the current chunk in the pool. */ +static void * +_pool_alloc_from_new_chunk(struct pool *pool, size_t size) +{ + struct _pool_chunk *chunk; + void *obj; + size_t capacity; + + /* If the allocation is smaller than the default chunk size then + * try getting a chunk off the free list. Force alloc of a new + * chunk for large requests. */ + capacity = size; + chunk = NULL; + if (size < pool->default_capacity) { + capacity = pool->default_capacity; + chunk = pool->first_free; + if (chunk) { + pool->first_free = chunk->prev_chunk; + _pool_chunk_init(chunk, pool->current, chunk->capacity); + } + } + + if (NULL == chunk) { + chunk = _pool_chunk_create (pool->current, capacity); + if (unlikely (NULL == chunk)) + return NULL; + } + pool->current = chunk; + + obj = ((unsigned char*)chunk + sizeof(*chunk) + chunk->size); + chunk->size += size; + return obj; +} + +inline static void * +pool_alloc(struct pool *pool, size_t size) +{ + struct _pool_chunk *chunk = pool->current; + + if (size <= chunk->capacity - chunk->size) { + void *obj = ((unsigned char*)chunk + sizeof(*chunk) + chunk->size); + chunk->size += size; + return obj; + } else + return _pool_alloc_from_new_chunk(pool, size); +} + +static void +pool_reset(struct pool *pool) +{ + /* Transfer all used chunks to the chunk free list. */ + struct _pool_chunk *chunk = pool->current; + if (chunk != pool->sentinel) { + while (chunk->prev_chunk != pool->sentinel) + chunk = chunk->prev_chunk; + + chunk->prev_chunk = pool->first_free; + pool->first_free = pool->current; + } + + /* Reset the sentinel as the current chunk. */ + pool->current = pool->sentinel; + pool->sentinel->size = 0; +} + +/* Rewinds the cell list's cursor to the beginning. After rewinding + * we're good to cell_list_find() the cell any x coordinate. */ +inline static void +cell_list_rewind(struct cell_list *cells) +{ + cells->cursor = &cells->head; +} + +/* Rewind the cell list if its cursor has been advanced past x. */ +inline static void +cell_list_maybe_rewind(struct cell_list *cells, int x) +{ + if ((*cells->cursor)->x > x) + cell_list_rewind(cells); +} + +static void +cell_list_init(struct cell_list *cells) +{ + pool_init(cells->cell_pool.base, + 256*sizeof(struct cell), + sizeof(cells->cell_pool.embedded)); + cells->tail.next = NULL; + cells->tail.x = INT_MAX; + cells->head = &cells->tail; + cell_list_rewind(cells); +} + +static void +cell_list_fini(struct cell_list *cells) +{ + pool_fini(cells->cell_pool.base); +} + +inline static void +cell_list_reset(struct cell_list *cells) +{ + cell_list_rewind(cells); + cells->head = &cells->tail; + pool_reset(cells->cell_pool.base); +} + +static struct cell * +cell_list_alloc(struct cell_list *cells, + struct cell *tail, + int x) +{ + struct cell *cell; + + cell = pool_alloc(cells->cell_pool.base, sizeof (struct cell)); + if (unlikely(NULL == cell)) + abort(); + + *cells->cursor = cell; + cell->next = tail; + cell->x = x; + cell->uncovered_area = 0; + cell->covered_height = 0; + return cell; +} + +/* Find a cell at the given x-coordinate. Returns %NULL if a new cell + * needed to be allocated but couldn't be. Cells must be found with + * non-decreasing x-coordinate until the cell list is rewound using + * cell_list_rewind(). Ownership of the returned cell is retained by + * the cell list. */ +inline static struct cell * +cell_list_find(struct cell_list *cells, int x) +{ + struct cell **cursor = cells->cursor; + struct cell *cell; + + do { + cell = *cursor; + if (cell->x >= x) + break; + + cursor = &cell->next; + } while (1); + cells->cursor = cursor; + + if (cell->x == x) + return cell; + + return cell_list_alloc(cells, cell, x); +} + +/* Add an unbounded subpixel span covering subpixels >= x to the + * coverage cells. */ +static void +cell_list_add_unbounded_subspan(struct cell_list *cells, grid_scaled_x_t x) +{ + struct cell *cell; + int ix, fx; + + FAST_SAMPLES_X_TO_INT_FRAC(x, ix, fx); + + DBG(("%s: x=%d (%d+%d)\n", __FUNCTION__, x, ix, fx)); + + cell = cell_list_find(cells, ix); + cell->uncovered_area += 2*fx; + cell->covered_height++; +} + +/* Add a subpixel span covering [x1, x2) to the coverage cells. */ +inline static void +cell_list_add_subspan(struct cell_list *cells, + grid_scaled_x_t x1, + grid_scaled_x_t x2) +{ + struct cell *cell; + int ix1, fx1; + int ix2, fx2; + + FAST_SAMPLES_X_TO_INT_FRAC(x1, ix1, fx1); + FAST_SAMPLES_X_TO_INT_FRAC(x2, ix2, fx2); + + DBG(("%s: x1=%d (%d+%d), x2=%d (%d+%d)\n", __FUNCTION__, + x1, ix1, fx1, x2, ix2, fx2)); + + cell = cell_list_find(cells, ix1); + if (ix1 != ix2) { + cell->uncovered_area += 2*fx1; + ++cell->covered_height; + + cell = cell_list_find(cells, ix2); + cell->uncovered_area -= 2*fx2; + --cell->covered_height; + } else + cell->uncovered_area += 2*(fx1-fx2); +} + +/* Adds the analytical coverage of an edge crossing the current pixel + * row to the coverage cells and advances the edge's x position to the + * following row. + * + * This function is only called when we know that during this pixel row: + * + * 1) The relative order of all edges on the active list doesn't + * change. In particular, no edges intersect within this row to pixel + * precision. + * + * 2) No new edges start in this row. + * + * 3) No existing edges end mid-row. + * + * This function depends on being called with all edges from the + * active list in the order they appear on the list (i.e. with + * non-decreasing x-coordinate.) */ +static void +cell_list_render_edge(struct cell_list *cells, struct edge *edge, int sign) +{ + grid_scaled_y_t y1, y2, dy; + grid_scaled_x_t fx1, fx2, dx; + int ix1, ix2; + struct quorem x1 = edge->x; + struct quorem x2 = x1; + + if (!edge->vertical) { + x2.quo += edge->dxdy_full.quo; + x2.rem += edge->dxdy_full.rem; + if (x2.rem >= 0) { + ++x2.quo; + x2.rem -= edge->dy; + } + + edge->x = x2; + } + + FAST_SAMPLES_X_TO_INT_FRAC(x1.quo, ix1, fx1); + FAST_SAMPLES_X_TO_INT_FRAC(x2.quo, ix2, fx2); + + DBG(("%s: x1=%d (%d+%d), x2=%d (%d+%d)\n", __FUNCTION__, + x1.quo, ix1, fx1, x2.quo, ix2, fx2)); + + /* Edge is entirely within a column? */ + if (ix1 == ix2) { + /* We always know that ix1 is >= the cell list cursor in this + * case due to the no-intersections precondition. */ + struct cell *cell = cell_list_find(cells, ix1); + cell->covered_height += sign*FAST_SAMPLES_Y; + cell->uncovered_area += sign*(fx1 + fx2)*FAST_SAMPLES_Y; + return; + } + + /* Orient the edge left-to-right. */ + dx = x2.quo - x1.quo; + if (dx >= 0) { + y1 = 0; + y2 = FAST_SAMPLES_Y; + } else { + int tmp; + tmp = ix1; ix1 = ix2; ix2 = tmp; + tmp = fx1; fx1 = fx2; fx2 = tmp; + dx = -dx; + sign = -sign; + y1 = FAST_SAMPLES_Y; + y2 = 0; + } + dy = y2 - y1; + + /* Add coverage for all pixels [ix1,ix2] on this row crossed + * by the edge. */ + { + struct quorem y = floored_divrem((FAST_SAMPLES_X - fx1)*dy, dx); + struct cell *cell; + + /* When rendering a previous edge on the active list we may + * advance the cell list cursor past the leftmost pixel of the + * current edge even though the two edges don't intersect. + * e.g. consider two edges going down and rightwards: + * + * --\_+---\_+-----+-----+---- + * \_ \_ | | + * | \_ | \_ | | + * | \_| \_| | + * | \_ \_ | + * ----+-----+-\---+-\---+---- + * + * The left edge touches cells past the starting cell of the + * right edge. Fortunately such cases are rare. + * + * The rewinding is never necessary if the current edge stays + * within a single column because we've checked before calling + * this function that the active list order won't change. */ + cell_list_maybe_rewind(cells, ix1); + + cell = cell_list_find(cells, ix1); + cell->uncovered_area += sign*y.quo*(FAST_SAMPLES_X + fx1); + cell->covered_height += sign*y.quo; + y.quo += y1; + + cell = cell_list_find(cells, ++ix1); + if (ix1 < ix2) { + struct quorem dydx_full = floored_divrem(FAST_SAMPLES_X*dy, dx); + do { + grid_scaled_y_t y_skip = dydx_full.quo; + y.rem += dydx_full.rem; + if (y.rem >= dx) { + ++y_skip; + y.rem -= dx; + } + + y.quo += y_skip; + + y_skip *= sign; + cell->uncovered_area += y_skip*FAST_SAMPLES_X; + cell->covered_height += y_skip; + + cell = cell_list_find(cells, ++ix1); + } while (ix1 != ix2); + } + cell->uncovered_area += sign*(y2 - y.quo)*fx2; + cell->covered_height += sign*(y2 - y.quo); + } +} + + +static void +polygon_fini(struct polygon *polygon) +{ + if (polygon->y_buckets != polygon->y_buckets_embedded) + free(polygon->y_buckets); + + if (polygon->edges != polygon->edges_embedded) + free(polygon->edges); +} + +static int +polygon_init(struct polygon *polygon, + int num_edges, + grid_scaled_y_t ymin, + grid_scaled_y_t ymax) +{ + unsigned h = ymax - ymin; + unsigned num_buckets = EDGE_Y_BUCKET_INDEX(ymax+EDGE_Y_BUCKET_HEIGHT-1, + ymin); + + if (unlikely(h > 0x7FFFFFFFU - EDGE_Y_BUCKET_HEIGHT)) + goto bail_no_mem; /* even if you could, you wouldn't want to. */ + + polygon->edges = polygon->edges_embedded; + polygon->y_buckets = polygon->y_buckets_embedded; + + polygon->num_edges = 0; + if (num_edges > ARRAY_SIZE(polygon->edges_embedded)) { + polygon->edges = malloc(sizeof(struct edge)*num_edges); + if (unlikely(NULL == polygon->edges)) + goto bail_no_mem; + } + + if (num_buckets > ARRAY_SIZE(polygon->y_buckets_embedded)) { + polygon->y_buckets = malloc(num_buckets*sizeof(struct edge *)); + if (unlikely(NULL == polygon->y_buckets)) + goto bail_no_mem; + } + memset(polygon->y_buckets, 0, num_buckets * sizeof(struct edge *)); + + polygon->ymin = ymin; + polygon->ymax = ymax; + return 0; + +bail_no_mem: + polygon_fini(polygon); + return -1; +} + +static void +_polygon_insert_edge_into_its_y_bucket(struct polygon *polygon, struct edge *e) +{ + unsigned ix = EDGE_Y_BUCKET_INDEX(e->ytop, polygon->ymin); + struct edge **ptail = &polygon->y_buckets[ix]; + e->next = *ptail; + *ptail = e; +} + +inline static void +polygon_add_edge(struct polygon *polygon, + grid_scaled_x_t x1, + grid_scaled_x_t x2, + grid_scaled_y_t y1, + grid_scaled_y_t y2, + grid_scaled_y_t top, + grid_scaled_y_t bottom, + int dir) +{ + struct edge *e = &polygon->edges[polygon->num_edges++]; + grid_scaled_x_t dx = x2 - x1; + grid_scaled_y_t dy = y2 - y1; + grid_scaled_y_t ytop, ybot; + grid_scaled_y_t ymin = polygon->ymin; + grid_scaled_y_t ymax = polygon->ymax; + + e->dy = dy; + e->dir = dir; + + ytop = top >= ymin ? top : ymin; + ybot = bottom <= ymax ? bottom : ymax; + e->ytop = ytop; + e->height_left = ybot - ytop; + + if (dx == 0) { + e->vertical = true; + e->x.quo = x1; + e->x.rem = 0; + e->dxdy.quo = 0; + e->dxdy.rem = 0; + e->dxdy_full.quo = 0; + e->dxdy_full.rem = 0; + } else { + e->vertical = false; + e->dxdy = floored_divrem(dx, dy); + if (ytop == y1) { + e->x.quo = x1; + e->x.rem = 0; + } else { + e->x = floored_muldivrem(ytop - y1, dx, dy); + e->x.quo += x1; + } + + if (e->height_left >= FAST_SAMPLES_Y) { + e->dxdy_full = floored_muldivrem(FAST_SAMPLES_Y, dx, dy); + } else { + e->dxdy_full.quo = 0; + e->dxdy_full.rem = 0; + } + } + + _polygon_insert_edge_into_its_y_bucket(polygon, e); + + e->x.rem -= dy; /* Bias the remainder for faster edge advancement. */ +} + +static void +active_list_reset(struct active_list *active) +{ + active->head = NULL; + active->min_height = 0; +} + +/* + * Merge two sorted edge lists. + * Input: + * - head_a: The head of the first list. + * - head_b: The head of the second list; head_b cannot be NULL. + * Output: + * Returns the head of the merged list. + * + * Implementation notes: + * To make it fast (in particular, to reduce to an insertion sort whenever + * one of the two input lists only has a single element) we iterate through + * a list until its head becomes greater than the head of the other list, + * then we switch their roles. As soon as one of the two lists is empty, we + * just attach the other one to the current list and exit. + * Writes to memory are only needed to "switch" lists (as it also requires + * attaching to the output list the list which we will be iterating next) and + * to attach the last non-empty list. + */ +static struct edge * +merge_sorted_edges(struct edge *head_a, struct edge *head_b) +{ + struct edge *head, **next; + + head = head_a; + next = &head; + + while (1) { + while (head_a != NULL && head_a->x.quo <= head_b->x.quo) { + next = &head_a->next; + head_a = head_a->next; + } + + *next = head_b; + if (head_a == NULL) + return head; + + while (head_b != NULL && head_b->x.quo <= head_a->x.quo) { + next = &head_b->next; + head_b = head_b->next; + } + + *next = head_a; + if (head_b == NULL) + return head; + } +} + +/* + * Sort (part of) a list. + * Input: + * - list: The list to be sorted; list cannot be NULL. + * - limit: Recursion limit. + * Output: + * - head_out: The head of the sorted list containing the first 2^(level+1) elements of the + * input list; if the input list has fewer elements, head_out be a sorted list + * containing all the elements of the input list. + * Returns the head of the list of unprocessed elements (NULL if the sorted list contains + * all the elements of the input list). + * + * Implementation notes: + * Special case single element list, unroll/inline the sorting of the first two elements. + * Some tail recursion is used since we iterate on the bottom-up solution of the problem + * (we start with a small sorted list and keep merging other lists of the same size to it). + */ +static struct edge * +sort_edges(struct edge *list, + unsigned int level, + struct edge **head_out) +{ + struct edge *head_other, *remaining; + unsigned int i; + + head_other = list->next; + + /* Single element list -> return */ + if (head_other == NULL) { + *head_out = list; + return NULL; + } + + /* Unroll the first iteration of the following loop (halves the number of calls to merge_sorted_edges): + * - Initialize remaining to be the list containing the elements after the second in the input list. + * - Initialize *head_out to be the sorted list containing the first two element. + */ + remaining = head_other->next; + if (list->x.quo <= head_other->x.quo) { + *head_out = list; + /* list->next = head_other; */ /* The input list is already like this. */ + head_other->next = NULL; + } else { + *head_out = head_other; + head_other->next = list; + list->next = NULL; + } + + for (i = 0; i < level && remaining; i++) { + /* Extract a sorted list of the same size as *head_out + * (2^(i+1) elements) from the list of remaining elements. */ + remaining = sort_edges(remaining, i, &head_other); + *head_out = merge_sorted_edges(*head_out, head_other); + } + + /* *head_out now contains (at most) 2^(level+1) elements. */ + + return remaining; +} + +/* Test if the edges on the active list can be safely advanced by a + * full row without intersections or any edges ending. */ +inline static bool +active_list_can_step_full_row(struct active_list *active) +{ + const struct edge *e; + int prev_x = INT_MIN; + + /* Recomputes the minimum height of all edges on the active + * list if we have been dropping edges. */ + if (active->min_height <= 0) { + int min_height = INT_MAX; + + e = active->head; + while (NULL != e) { + if (e->height_left < min_height) + min_height = e->height_left; + e = e->next; + } + + active->min_height = min_height; + } + + if (active->min_height < FAST_SAMPLES_Y) + return false; + + /* Check for intersections as no edges end during the next row. */ + e = active->head; + while (NULL != e) { + struct quorem x = e->x; + + if (!e->vertical) { + x.quo += e->dxdy_full.quo; + x.rem += e->dxdy_full.rem; + if (x.rem >= 0) + ++x.quo; + } + + if (x.quo <= prev_x) + return false; + + prev_x = x.quo; + e = e->next; + } + + return true; +} + +/* Merges edges on the given subpixel row from the polygon to the + * active_list. */ +inline static void +merge_edges(struct active_list *active, + grid_scaled_y_t y, + struct edge **ptail) +{ + /* Split off the edges on the current subrow and merge them into + * the active list. */ + int min_height = active->min_height; + struct edge *subrow_edges = NULL; + + do { + struct edge *tail = *ptail; + if (NULL == tail) + break; + + if (y == tail->ytop) { + *ptail = tail->next; + tail->next = subrow_edges; + subrow_edges = tail; + if (tail->height_left < min_height) + min_height = tail->height_left; + } else + ptail = &tail->next; + } while (1); + + if (subrow_edges) { + sort_edges(subrow_edges, UINT_MAX, &subrow_edges); + active->head = merge_sorted_edges(active->head, subrow_edges); + active->min_height = min_height; + } +} + +/* Advance the edges on the active list by one subsample row by + * updating their x positions. Drop edges from the list that end. */ +inline static void +substep_edges(struct active_list *active) +{ + struct edge **cursor = &active->head; + grid_scaled_x_t prev_x = INT_MIN; + struct edge *unsorted = NULL; + + do { + struct edge *edge = *cursor; + if (NULL == edge) + break; + + if (0 != --edge->height_left) { + edge->x.quo += edge->dxdy.quo; + edge->x.rem += edge->dxdy.rem; + if (edge->x.rem >= 0) { + ++edge->x.quo; + edge->x.rem -= edge->dy; + } + + if (edge->x.quo < prev_x) { + *cursor = edge->next; + edge->next = unsorted; + unsorted = edge; + } else { + prev_x = edge->x.quo; + cursor = &edge->next; + } + } else + *cursor = edge->next; + } while (1); + + if (unsorted) { + sort_edges(unsorted, UINT_MAX, &unsorted); + active->head = merge_sorted_edges(active->head, unsorted); + } +} + +inline static void +apply_nonzero_fill_rule_for_subrow(struct active_list *active, + struct cell_list *coverages) +{ + struct edge *edge = active->head; + int winding = 0; + int xstart; + int xend; + + cell_list_rewind (coverages); + + while (NULL != edge) { + xstart = edge->x.quo; + winding = edge->dir; + while (1) { + edge = edge->next; + if (NULL == edge) + return cell_list_add_unbounded_subspan(coverages, xstart); + + winding += edge->dir; + if (0 == winding) { + if (edge->next == NULL || + edge->next->x.quo != edge->x.quo) + break; + } + } + + xend = edge->x.quo; + cell_list_add_subspan(coverages, xstart, xend); + + edge = edge->next; + } +} + +static void +apply_nonzero_fill_rule_and_step_edges(struct active_list *active, + struct cell_list *coverages) +{ + struct edge **cursor = &active->head; + struct edge *left_edge; + + left_edge = *cursor; + while (NULL != left_edge) { + struct edge *right_edge; + int winding = left_edge->dir; + + left_edge->height_left -= FAST_SAMPLES_Y; + if (left_edge->height_left) + cursor = &left_edge->next; + else + *cursor = left_edge->next; + + do { + right_edge = *cursor; + if (NULL == right_edge) + return cell_list_render_edge(coverages, + left_edge, + +1); + + right_edge->height_left -= FAST_SAMPLES_Y; + if (right_edge->height_left) + cursor = &right_edge->next; + else + *cursor = right_edge->next; + + winding += right_edge->dir; + if (0 == winding) { + if (right_edge->next == NULL || + right_edge->next->x.quo != right_edge->x.quo) + break; + } + + if (!right_edge->vertical) { + right_edge->x.quo += right_edge->dxdy_full.quo; + right_edge->x.rem += right_edge->dxdy_full.rem; + if (right_edge->x.rem >= 0) { + ++right_edge->x.quo; + right_edge->x.rem -= right_edge->dy; + } + } + } while (1); + + cell_list_render_edge(coverages, left_edge, +1); + cell_list_render_edge(coverages, right_edge, -1); + + left_edge = *cursor; + } +} + +static void +tor_fini(struct tor *converter) +{ + polygon_fini(converter->polygon); + cell_list_fini(converter->coverages); +} + +static int +tor_init(struct tor *converter, const BoxRec *box, int num_edges) +{ + DBG(("%s: (%d, %d),(%d, %d) x (%d, %d)\n", + __FUNCTION__, + box->x1, box->y1, box->x2, box->y2, + FAST_SAMPLES_X, FAST_SAMPLES_Y)); + + converter->xmin = box->x1; + converter->ymin = box->y1; + converter->xmax = box->x2; + converter->ymax = box->y2; + + cell_list_init(converter->coverages); + active_list_reset(converter->active); + return polygon_init(converter->polygon, + num_edges, + box->y1 * FAST_SAMPLES_Y, + box->y2 * FAST_SAMPLES_Y); +} + +static void +tor_add_edge(struct tor *converter, + int dx, int dy, + int top, int bottom, + const xLineFixed *edge, + int dir) +{ + int x1, x2; + int y1, y2; + + y1 = dy + (edge->p1.y >> (16 - FAST_SAMPLES_Y_shift)); + y2 = dy + (edge->p2.y >> (16 - FAST_SAMPLES_Y_shift)); + if (y1 == y2) + return; + + x1 = dx + (edge->p1.x >> (16 - FAST_SAMPLES_X_shift)); + x2 = dx + (edge->p2.x >> (16 - FAST_SAMPLES_X_shift)); + + DBG(("%s: edge=(%d, %d), (%d, %d), top=%d, bottom=%d, dir=%d\n", + __FUNCTION__, x1, y1, x2, y2, top, bottom, dir)); + polygon_add_edge(converter->polygon, + x1, x2, + y1, y2, + top, bottom, + dir); +} + +static bool +active_list_is_vertical(struct active_list *active) +{ + struct edge *e; + + for (e = active->head; e != NULL; e = e->next) + if (!e->vertical) + return false; + + return true; +} + +static void +step_edges(struct active_list *active, int count) +{ + struct edge **cursor = &active->head; + struct edge *edge; + + for (edge = *cursor; edge != NULL; edge = *cursor) { + edge->height_left -= FAST_SAMPLES_Y * count; + if (edge->height_left) + cursor = &edge->next; + else + *cursor = edge->next; + } +} + +static void +tor_blt_span(struct sna *sna, + struct sna_composite_spans_op *op, + pixman_region16_t *clip, + const BoxRec *box, + int coverage) +{ + DBG(("%s: %d -> %d @ %d\n", __FUNCTION__, box->x1, box->x2, coverage)); + + op->boxes(sna, op, box, 1, AREA_TO_ALPHA(coverage)); + apply_damage_box(&op->base, box); +} + +static void +tor_blt_span_clipped(struct sna *sna, + struct sna_composite_spans_op *op, + pixman_region16_t *clip, + const BoxRec *box, + int coverage) +{ + pixman_region16_t region; + float opacity; + + opacity = AREA_TO_ALPHA(coverage); + DBG(("%s: %d -> %d @ %f\n", __FUNCTION__, box->x1, box->x2, opacity)); + + pixman_region_init_rects(®ion, box, 1); + RegionIntersect(®ion, ®ion, clip); + if (REGION_NUM_RECTS(®ion)) { + op->boxes(sna, op, + REGION_RECTS(®ion), + REGION_NUM_RECTS(®ion), + opacity); + apply_damage(&op->base, ®ion); + } + pixman_region_fini(®ion); +} + +static void +tor_blt_span_mono(struct sna *sna, + struct sna_composite_spans_op *op, + pixman_region16_t *clip, + const BoxRec *box, + int coverage) +{ + if (coverage < FAST_SAMPLES_XY/2) + return; + + tor_blt_span(sna, op, clip, box, FAST_SAMPLES_XY); +} + +static void +tor_blt_span_mono_clipped(struct sna *sna, + struct sna_composite_spans_op *op, + pixman_region16_t *clip, + const BoxRec *box, + int coverage) +{ + if (coverage < FAST_SAMPLES_XY/2) + return; + + tor_blt_span_clipped(sna, op, clip, box, FAST_SAMPLES_XY); +} + +static void +tor_blt_span_mono_unbounded(struct sna *sna, + struct sna_composite_spans_op *op, + pixman_region16_t *clip, + const BoxRec *box, + int coverage) +{ + tor_blt_span(sna, op, clip, box, + coverage < FAST_SAMPLES_XY/2 ? 0 :FAST_SAMPLES_XY); +} + +static void +tor_blt_span_mono_unbounded_clipped(struct sna *sna, + struct sna_composite_spans_op *op, + pixman_region16_t *clip, + const BoxRec *box, + int coverage) +{ + tor_blt_span_clipped(sna, op, clip, box, + coverage < FAST_SAMPLES_XY/2 ? 0 :FAST_SAMPLES_XY); +} + +static void +tor_blt(struct sna *sna, + struct sna_composite_spans_op *op, + pixman_region16_t *clip, + void (*span)(struct sna *sna, + struct sna_composite_spans_op *op, + pixman_region16_t *clip, + const BoxRec *box, + int coverage), + struct cell_list *cells, + int y, int height, + int xmin, int xmax, + int unbounded) +{ + struct cell *cell = cells->head; + BoxRec box; + int cover = 0; + + /* Skip cells to the left of the clip region. */ + while (cell != NULL && cell->x < xmin) { + DBG(("%s: skipping cell (%d, %d, %d)\n", + __FUNCTION__, + cell->x, cell->covered_height, cell->uncovered_area)); + + cover += cell->covered_height; + cell = cell->next; + } + cover *= FAST_SAMPLES_X*2; + + box.y1 = y; + box.y2 = y + height; + box.x1 = xmin; + + /* Form the spans from the coverages and areas. */ + for (; cell != NULL; cell = cell->next) { + int x = cell->x; + + DBG(("%s: cell=(%d, %d, %d), cover=%d, max=%d\n", __FUNCTION__, + cell->x, cell->covered_height, cell->uncovered_area, + cover, xmax)); + + if (x >= xmax) + break; + + box.x2 = x; + if (box.x2 > box.x1 && (unbounded || cover)) + span(sna, op, clip, &box, cover); + box.x1 = box.x2; + + cover += cell->covered_height*FAST_SAMPLES_X*2; + + if (cell->uncovered_area) { + int area = cover - cell->uncovered_area; + box.x2 = x + 1; + if (unbounded || area) + span(sna, op, clip, &box, area); + box.x1 = box.x2; + } + } + + box.x2 = xmax; + if (box.x2 > box.x1 && (unbounded || cover)) + span(sna, op, clip, &box, cover); +} + +static void +tor_blt_empty(struct sna *sna, + struct sna_composite_spans_op *op, + pixman_region16_t *clip, + void (*span)(struct sna *sna, + struct sna_composite_spans_op *op, + pixman_region16_t *clip, + const BoxRec *box, + int coverage), + int y, int height, + int xmin, int xmax) +{ + BoxRec box; + + box.x1 = xmin; + box.x2 = xmax; + box.y1 = y; + box.y2 = y + height; + + span(sna, op, clip, &box, 0); +} + +static void +tor_render(struct sna *sna, + struct tor *converter, + struct sna_composite_spans_op *op, + pixman_region16_t *clip, + void (*span)(struct sna *sna, + struct sna_composite_spans_op *op, + pixman_region16_t *clip, + const BoxRec *box, + int coverage), + int unbounded) +{ + int ymin = converter->ymin; + int xmin = converter->xmin; + int xmax = converter->xmax; + int i, j, h = converter->ymax - ymin; + struct polygon *polygon = converter->polygon; + struct cell_list *coverages = converter->coverages; + struct active_list *active = converter->active; + + DBG(("%s: unbounded=%d\n", __FUNCTION__, unbounded)); + + /* Render each pixel row. */ + for (i = 0; i < h; i = j) { + int do_full_step = 0; + + j = i + 1; + + /* Determine if we can ignore this row or use the full pixel + * stepper. */ + if (!polygon->y_buckets[i]) { + if (!active->head) { + for (; j < h && !polygon->y_buckets[j]; j++) + ; + DBG(("%s: no new edges and no exisiting edges, skipping, %d -> %d\n", + __FUNCTION__, i, j)); + + if (unbounded) + tor_blt_empty(sna, op, clip, span, i+ymin, j-i, xmin, xmax); + continue; + } + + do_full_step = active_list_can_step_full_row(active); + } + + DBG(("%s: do_full_step=%d, new edges=%d\n", + __FUNCTION__, do_full_step, polygon->y_buckets[i] != NULL)); + if (do_full_step) { + /* Step by a full pixel row's worth. */ + apply_nonzero_fill_rule_and_step_edges(active, + coverages); + + if (active_list_is_vertical(active)) { + while (j < h && + polygon->y_buckets[j] == NULL && + active->min_height >= 2*FAST_SAMPLES_Y) + { + active->min_height -= FAST_SAMPLES_Y; + j++; + } + if (j != i + 1) + step_edges(active, j - (i + 1)); + + DBG(("%s: vertical edges, full step (%d, %d)\n", + __FUNCTION__, i, j)); + } + } else { + grid_scaled_y_t y = (i+ymin)*FAST_SAMPLES_Y; + grid_scaled_y_t suby; + + /* Subsample this row. */ + for (suby = 0; suby < FAST_SAMPLES_Y; suby++) { + if (polygon->y_buckets[i]) + merge_edges(active, + y + suby, + &polygon->y_buckets[i]); + + apply_nonzero_fill_rule_for_subrow(active, + coverages); + substep_edges(active); + } + } + + if (coverages->head != &coverages->tail) { + tor_blt(sna, op, clip, span, coverages, + i+ymin, j-i, xmin, xmax, + unbounded); + cell_list_reset(coverages); + } else if (unbounded) + tor_blt_empty(sna, op, clip, span, i+ymin, j-i, xmin, xmax); + + if (!active->head) + active->min_height = INT_MAX; + else + active->min_height -= FAST_SAMPLES_Y; + } +} + +static int operator_is_bounded(uint8_t op) +{ + switch (op) { + case PictOpOver: + case PictOpOutReverse: + case PictOpAdd: + return TRUE; + default: + return FALSE; + } +} + +static void +trapezoids_fallback(CARD8 op, PicturePtr src, PicturePtr dst, + PictFormatPtr maskFormat, INT16 xSrc, INT16 ySrc, + int ntrap, xTrapezoid * traps) +{ + ScreenPtr screen = dst->pDrawable->pScreen; + + if (maskFormat) { + PixmapPtr scratch; + PicturePtr mask; + INT16 dst_x, dst_y; + BoxRec bounds; + int width, height, depth; + pixman_image_t *image; + pixman_format_code_t format; + int error; + + dst_x = pixman_fixed_to_int(traps[0].left.p1.x); + dst_y = pixman_fixed_to_int(traps[0].left.p1.y); + + miTrapezoidBounds(ntrap, traps, &bounds); + if (bounds.y1 >= bounds.y2 || bounds.x1 >= bounds.x2) + return; + + DBG(("%s: bounds (%d, %d), (%d, %d)\n", + __FUNCTION__, bounds.x1, bounds.y1, bounds.x2, bounds.y2)); + + if (!sna_compute_composite_extents(&bounds, + src, NULL, dst, + xSrc, ySrc, + 0, 0, + bounds.x1, bounds.y1, + bounds.x2 - bounds.x1, + bounds.y2 - bounds.y1)) + return; + + DBG(("%s: extents (%d, %d), (%d, %d)\n", + __FUNCTION__, bounds.x1, bounds.y1, bounds.x2, bounds.y2)); + + width = bounds.x2 - bounds.x1; + height = bounds.y2 - bounds.y1; + bounds.x1 -= dst->pDrawable->x; + bounds.y1 -= dst->pDrawable->y; + depth = maskFormat->depth; + format = maskFormat->format | (BitsPerPixel(depth) << 24); + + DBG(("%s: mask (%dx%d) depth=%d, format=%08x\n", + __FUNCTION__, width, height, depth, format)); + scratch = sna_pixmap_create_upload(screen, + width, height, depth); + if (!scratch) + return; + + memset(scratch->devPrivate.ptr, 0, scratch->devKind*height); + image = pixman_image_create_bits(format, width, height, + scratch->devPrivate.ptr, + scratch->devKind); + if (image) { + for (; ntrap; ntrap--, traps++) + pixman_rasterize_trapezoid(image, + (pixman_trapezoid_t *)traps, + -bounds.x1, -bounds.y1); + + pixman_image_unref(image); + } + + mask = CreatePicture(0, &scratch->drawable, + PictureMatchFormat(screen, depth, format), + 0, 0, serverClient, &error); + screen->DestroyPixmap(scratch); + if (!mask) + return; + + CompositePicture(op, src, mask, dst, + xSrc + bounds.x1 - dst_x, + ySrc + bounds.y1 - dst_y, + 0, 0, + bounds.x1, bounds.y1, + width, height); + FreePicture(mask, 0); + } else { + if (dst->polyEdge == PolyEdgeSharp) + maskFormat = PictureMatchFormat(screen, 1, PICT_a1); + else + maskFormat = PictureMatchFormat(screen, 8, PICT_a8); + + for (; ntrap; ntrap--, traps++) + trapezoids_fallback(op, + src, dst, maskFormat, + xSrc, ySrc, 1, traps); + } +} + +static Bool +composite_aligned_boxes(CARD8 op, + PicturePtr src, + PicturePtr dst, + PictFormatPtr maskFormat, + INT16 src_x, INT16 src_y, + int ntrap, xTrapezoid *traps) +{ + BoxRec stack_boxes[64], *boxes, extents; + pixman_region16_t region, clip; + struct sna *sna; + struct sna_composite_op tmp; + Bool ret = true; + int dx, dy, n, num_boxes; + + DBG(("%s\n", __FUNCTION__)); + + boxes = stack_boxes; + if (ntrap > ARRAY_SIZE(stack_boxes)) + boxes = malloc(sizeof(BoxRec)*ntrap); + + dx = dst->pDrawable->x; + dy = dst->pDrawable->y; + + extents.x1 = extents.y1 = 32767; + extents.x2 = extents.y2 = -32767; + num_boxes = 0; + for (n = 0; n < ntrap; n++) { + boxes[num_boxes].x1 = dx + pixman_fixed_to_int(traps[n].left.p1.x + pixman_fixed_1_minus_e/2); + boxes[num_boxes].y1 = dy + pixman_fixed_to_int(traps[n].top + pixman_fixed_1_minus_e/2); + boxes[num_boxes].x2 = dx + pixman_fixed_to_int(traps[n].right.p2.x + pixman_fixed_1_minus_e/2); + boxes[num_boxes].y2 = dy + pixman_fixed_to_int(traps[n].bottom + pixman_fixed_1_minus_e/2); + + if (boxes[num_boxes].x1 >= boxes[num_boxes].x2) + continue; + if (boxes[num_boxes].y1 >= boxes[num_boxes].y2) + continue; + + if (boxes[num_boxes].x1 < extents.x1) + extents.x1 = boxes[num_boxes].x1; + if (boxes[num_boxes].x2 > extents.x2) + extents.x2 = boxes[num_boxes].x2; + + if (boxes[num_boxes].y1 < extents.y1) + extents.y1 = boxes[num_boxes].y1; + if (boxes[num_boxes].y2 > extents.y2) + extents.y2 = boxes[num_boxes].y2; + + num_boxes++; + } + + if (num_boxes == 0) + return true; + + DBG(("%s: extents (%d, %d), (%d, %d) offset of (%d, %d)\n", + __FUNCTION__, + extents.x1, extents.y1, + extents.x2, extents.y2, + extents.x1 - boxes[0].x1, + extents.y1 - boxes[0].y1)); + + src_x += extents.x1 - boxes[0].x1; + src_y += extents.y1 - boxes[0].y1; + + if (!sna_compute_composite_region(&clip, + src, NULL, dst, + src_x, src_y, + 0, 0, + extents.x1 - dx, extents.y1 - dy, + extents.x2 - extents.x1, + extents.y2 - extents.y1)) { + DBG(("%s: trapezoids do not intersect drawable clips\n", + __FUNCTION__)) ; + goto done; + } + + memset(&tmp, 0, sizeof(tmp)); + sna = to_sna_from_drawable(dst->pDrawable); + if (!sna->render.composite(sna, op, src, NULL, dst, + src_x, src_y, + 0, 0, + extents.x1, extents.y1, + extents.x2 - extents.x1, + extents.y2 - extents.y1, + &tmp)) { + DBG(("%s: composite render op not supported\n", + __FUNCTION__)); + ret = false; + goto done; + } + + if (maskFormat || + (op == PictOpSrc || op == PictOpClear) || + num_boxes == 1) { + pixman_region_init_rects(®ion, boxes, num_boxes); + RegionIntersect(®ion, ®ion, &clip); + if (REGION_NUM_RECTS(®ion)) { + tmp.boxes(sna, &tmp, + REGION_RECTS(®ion), + REGION_NUM_RECTS(®ion)); + apply_damage(&tmp, ®ion); + } + pixman_region_fini(®ion); + } else { + for (n = 0; n < num_boxes; n++) { + pixman_region_init_rects(®ion, &boxes[n], 1); + RegionIntersect(®ion, ®ion, &clip); + if (REGION_NUM_RECTS(®ion)) { + tmp.boxes(sna, &tmp, + REGION_RECTS(®ion), + REGION_NUM_RECTS(®ion)); + apply_damage(&tmp, ®ion); + } + pixman_region_fini(®ion); + } + } + tmp.done(sna, &tmp); + +done: + REGION_UNINIT(NULL, &clip); + if (boxes != stack_boxes) + free(boxes); + + return ret; +} + +static inline int coverage(int samples, pixman_fixed_t f) +{ + return (samples * pixman_fixed_frac(f) + pixman_fixed_1/2) / pixman_fixed_1; +} + +static void +composite_unaligned_box(struct sna *sna, + struct sna_composite_spans_op *tmp, + const BoxRec *box, + float opacity, + pixman_region16_t *clip) +{ + pixman_region16_t region; + + pixman_region_init_rects(®ion, box, 1); + RegionIntersect(®ion, ®ion, clip); + if (REGION_NUM_RECTS(®ion)) { + tmp->boxes(sna, tmp, + REGION_RECTS(®ion), + REGION_NUM_RECTS(®ion), + opacity); + apply_damage(&tmp->base, ®ion); + } + pixman_region_fini(®ion); +} + +static void +composite_unaligned_trap_row(struct sna *sna, + struct sna_composite_spans_op *tmp, + xTrapezoid *trap, int dx, + int y1, int y2, int covered, + pixman_region16_t *clip) +{ + BoxRec box; + int opacity; + int x1, x2; + + if (covered == 0) + return; + + if (y2 > clip->extents.y2) + y2 = clip->extents.y2; + if (y1 < clip->extents.y1) + y1 = clip->extents.y1; + if (y1 >= y2) + return; + + x1 = dx + pixman_fixed_to_int(trap->left.p1.x); + x2 = dx + pixman_fixed_to_int(trap->right.p1.x); + if (x2 < clip->extents.x1 || x1 > clip->extents.x2) + return; + + box.y1 = y1; + box.y2 = y2; + + if (x1 == x2) { + box.x1 = x1; + box.x2 = x2 + 1; + + opacity = covered; + opacity *= coverage(SAMPLES_X, trap->right.p1.x) - coverage(SAMPLES_X, trap->left.p1.x); + + if (opacity) + composite_unaligned_box(sna, tmp, &box, + opacity/255., clip); + } else { + if (pixman_fixed_frac(trap->left.p1.x)) { + box.x1 = x1; + box.x2 = x1++; + + opacity = covered; + opacity *= SAMPLES_X - coverage(SAMPLES_X, trap->left.p1.x); + + if (opacity) + composite_unaligned_box(sna, tmp, &box, + opacity/255., clip); + } + + if (x2 > x1) { + box.x1 = x1; + box.x2 = x2; + + composite_unaligned_box(sna, tmp, &box, + covered*SAMPLES_X/255., clip); + } + + if (pixman_fixed_frac(trap->right.p1.x)) { + box.x1 = x2; + box.x2 = x2 + 1; + + opacity = covered; + opacity *= coverage(SAMPLES_X, trap->right.p1.x); + + if (opacity) + composite_unaligned_box(sna, tmp, &box, + opacity/255., clip); + } + } +} + +static void +composite_unaligned_trap(struct sna *sna, + struct sna_composite_spans_op *tmp, + xTrapezoid *trap, + int dx, int dy, + pixman_region16_t *clip) +{ + int y1, y2; + + y1 = dy + pixman_fixed_to_int(trap->top); + y2 = dy + pixman_fixed_to_int(trap->bottom); + + if (y1 == y2) { + composite_unaligned_trap_row(sna, tmp, trap, dx, + y1, y1 + 1, + coverage(SAMPLES_Y, trap->bottom) - coverage(SAMPLES_Y, trap->top), + clip); + } else { + if (pixman_fixed_frac(trap->top)) { + composite_unaligned_trap_row(sna, tmp, trap, dx, + y1, y1 + 1, + SAMPLES_Y - coverage(SAMPLES_Y, trap->top), + clip); + y1++; + } + + if (y2 > y1) + composite_unaligned_trap_row(sna, tmp, trap, dx, + y1, y2, + SAMPLES_Y, + clip); + + if (pixman_fixed_frac(trap->bottom)) + composite_unaligned_trap_row(sna, tmp, trap, dx, + y2, y2 + 1, + coverage(SAMPLES_Y, trap->bottom), + clip); + } +} + +inline static void +blt_opacity(PixmapPtr scratch, + int x1, int x2, + int y, int h, + uint8_t opacity) +{ + uint8_t *ptr; + + if (opacity == 0xff) + return; + + if (x1 < 0) + x1 = 0; + if (x2 > scratch->drawable.width) + x2 = scratch->drawable.width; + if (x1 >= x2) + return; + + x2 -= x1; + + ptr = scratch->devPrivate.ptr; + ptr += scratch->devKind * y; + ptr += x1; + do { + if (x2 == 1) + *ptr = opacity; + else + memset(ptr, opacity, x2); + ptr += scratch->devKind; + } while (--h); +} + +static void +blt_unaligned_box_row(PixmapPtr scratch, + BoxPtr extents, + xTrapezoid *trap, + int y1, int y2, + int covered) +{ + int x1, x2; + + if (y2 > scratch->drawable.height) + y2 = scratch->drawable.height; + if (y1 < 0) + y1 = 0; + if (y1 >= y2) + return; + + y2 -= y1; + + x1 = pixman_fixed_to_int(trap->left.p1.x); + x2 = pixman_fixed_to_int(trap->right.p1.x); + + x1 -= extents->x1; + x2 -= extents->x1; + + if (x1 == x2) { + blt_opacity(scratch, + x1, x1+1, + y1, y2, + covered * (coverage(SAMPLES_X, trap->right.p1.x) - coverage(SAMPLES_X, trap->left.p1.x))); + } else { + if (pixman_fixed_frac(trap->left.p1.x)) + blt_opacity(scratch, + x1, x1+1, + y1, y2, + covered * (SAMPLES_X - coverage(SAMPLES_X, trap->left.p1.x))); + + if (x2 > x1 + 1) { + blt_opacity(scratch, + x1 + 1, x2, + y1, y2, + covered*SAMPLES_X); + } + + if (pixman_fixed_frac(trap->right.p1.x)) + blt_opacity(scratch, + x2, x2 + 1, + y1, y2, + covered * coverage(SAMPLES_X, trap->right.p1.x)); + } +} + +static Bool +composite_unaligned_boxes_fallback(CARD8 op, + PicturePtr src, + PicturePtr dst, + INT16 src_x, INT16 src_y, + int ntrap, xTrapezoid *traps) +{ + ScreenPtr screen = dst->pDrawable->pScreen; + INT16 dst_x = pixman_fixed_to_int(traps[0].left.p1.x); + INT16 dst_y = pixman_fixed_to_int(traps[0].left.p1.y); + int dx = dst->pDrawable->x; + int dy = dst->pDrawable->y; + int n; + + for (n = 0; n < ntrap; n++) { + xTrapezoid *t = &traps[n]; + PixmapPtr scratch; + PicturePtr mask; + BoxRec extents; + int error; + int y1, y2; + + extents.x1 = pixman_fixed_to_int(t->left.p1.x); + extents.x2 = pixman_fixed_to_int(t->right.p1.x + pixman_fixed_1_minus_e); + extents.y1 = pixman_fixed_to_int(t->top); + extents.y2 = pixman_fixed_to_int(t->bottom + pixman_fixed_1_minus_e); + + if (!sna_compute_composite_extents(&extents, + src, NULL, dst, + src_x, src_y, + 0, 0, + extents.x1, extents.y1, + extents.x2 - extents.x1, + extents.y2 - extents.y1)) + continue; + + scratch = sna_pixmap_create_upload(screen, + extents.x2 - extents.x1, + extents.y2 - extents.y1, + 8); + if (!scratch) + continue; + + memset(scratch->devPrivate.ptr, 0xff, + scratch->devKind * (extents.y2 - extents.y1)); + + extents.x1 -= dx; + extents.x2 -= dx; + extents.y1 -= dy; + extents.y2 -= dy; + + y1 = pixman_fixed_to_int(t->top) - extents.y1; + y2 = pixman_fixed_to_int(t->bottom) - extents.y1; + + if (y1 == y2) { + blt_unaligned_box_row(scratch, &extents, t, y1, y1 + 1, + coverage(SAMPLES_Y, t->bottom) - coverage(SAMPLES_Y, t->top)); + } else { + if (pixman_fixed_frac(t->top)) + blt_unaligned_box_row(scratch, &extents, t, y1, y1 + 1, + SAMPLES_Y - coverage(SAMPLES_Y, t->top)); + + if (y2 > y1 + 1) + blt_unaligned_box_row(scratch, &extents, t, y1+1, y2, + SAMPLES_Y); + + if (pixman_fixed_frac(t->bottom)) + blt_unaligned_box_row(scratch, &extents, t, y2, y2+1, + coverage(SAMPLES_Y, t->bottom)); + } + + mask = CreatePicture(0, &scratch->drawable, + PictureMatchFormat(screen, 8, PICT_a8), + 0, 0, serverClient, &error); + screen->DestroyPixmap(scratch); + if (mask) { + CompositePicture(op, src, mask, dst, + src_x + extents.x1 - dst_x, + src_y + extents.y1 - dst_y, + 0, 0, + extents.x1, extents.y1, + extents.x2 - extents.x1, + extents.y2 - extents.y1); + FreePicture(mask, 0); + } + } + + return TRUE; +} + +static Bool +composite_unaligned_boxes(CARD8 op, + PicturePtr src, + PicturePtr dst, + PictFormatPtr maskFormat, + INT16 src_x, INT16 src_y, + int ntrap, xTrapezoid *traps) +{ + struct sna *sna; + BoxRec extents; + struct sna_composite_spans_op tmp; + pixman_region16_t clip; + int dst_x, dst_y; + int dx, dy, n; + + DBG(("%s\n", __FUNCTION__)); + + /* XXX need a span converter to handle overlapping traps */ + if (ntrap > 1 && maskFormat) + return false; + + sna = to_sna_from_drawable(dst->pDrawable); + if (!sna->render.composite_spans) + return composite_unaligned_boxes_fallback(op, src, dst, src_x, src_y, ntrap, traps); + + dst_x = extents.x1 = pixman_fixed_to_int(traps[0].left.p1.x); + extents.x2 = pixman_fixed_to_int(traps[0].right.p1.x + pixman_fixed_1_minus_e); + dst_y = extents.y1 = pixman_fixed_to_int(traps[0].top); + extents.y2 = pixman_fixed_to_int(traps[0].bottom + pixman_fixed_1_minus_e); + + DBG(("%s: src=(%d, %d), dst=(%d, %d)\n", + __FUNCTION__, src_x, src_y, dst_x, dst_y)); + + for (n = 1; n < ntrap; n++) { + int x1 = pixman_fixed_to_int(traps[n].left.p1.x); + int x2 = pixman_fixed_to_int(traps[n].right.p1.x + pixman_fixed_1_minus_e); + int y1 = pixman_fixed_to_int(traps[n].top); + int y2 = pixman_fixed_to_int(traps[n].bottom + pixman_fixed_1_minus_e); + + if (x1 < extents.x1) + extents.x1 = x1; + if (x2 > extents.x2) + extents.x2 = x2; + if (y1 < extents.y1) + extents.y1 = y1; + if (y2 > extents.y2) + extents.y2 = y2; + } + + DBG(("%s: extents (%d, %d), (%d, %d)\n", __FUNCTION__, + extents.x1, extents.y1, extents.x2, extents.y2)); + + if (!sna_compute_composite_region(&clip, + src, NULL, dst, + src_x + extents.x1 - dst_x, + src_y + extents.y1 - dst_y, + 0, 0, + extents.x1, extents.y1, + extents.x2 - extents.x1, + extents.y2 - extents.y1)) { + DBG(("%s: trapezoids do not intersect drawable clips\n", + __FUNCTION__)) ; + return true; + } + + extents = *RegionExtents(&clip); + dx = dst->pDrawable->x; + dy = dst->pDrawable->y; + + DBG(("%s: after clip -- extents (%d, %d), (%d, %d), delta=(%d, %d) src -> (%d, %d)\n", + __FUNCTION__, + extents.x1, extents.y1, + extents.x2, extents.y2, + dx, dy, + src_x + extents.x1 - dst_x - dx, + src_y + extents.y1 - dst_y - dy)); + + memset(&tmp, 0, sizeof(tmp)); + if (!sna->render.composite_spans(sna, op, src, dst, + src_x + extents.x1 - dst_x - dx, + src_y + extents.y1 - dst_y - dy, + extents.x1, extents.y1, + extents.x2 - extents.x1, + extents.y2 - extents.y1, + &tmp)) { + DBG(("%s: composite spans render op not supported\n", + __FUNCTION__)); + return false; + } + + for (n = 0; n < ntrap; n++) + composite_unaligned_trap(sna, &tmp, &traps[n], dx, dy, &clip); + tmp.done(sna, &tmp); + + REGION_UNINIT(NULL, &clip); + return true; +} + +static bool +tor_scan_converter(CARD8 op, PicturePtr src, PicturePtr dst, + PictFormatPtr maskFormat, INT16 src_x, INT16 src_y, + int ntrap, xTrapezoid *traps) +{ + struct sna *sna; + struct sna_composite_spans_op tmp; + struct tor tor; + void (*span)(struct sna *sna, + struct sna_composite_spans_op *op, + pixman_region16_t *clip, + const BoxRec *box, + int coverage); + BoxRec extents; + pixman_region16_t clip; + int16_t dst_x, dst_y; + int16_t dx, dy; + int n; + + /* XXX strict adhernce to the Reneder specification */ + if (dst->polyMode == PolyModePrecise) { + DBG(("%s: fallback -- precise rasterisation requested\n", + __FUNCTION__)); + return false; + } + + sna = to_sna_from_drawable(dst->pDrawable); + if (!sna->render.composite_spans) { + DBG(("%s: fallback -- composite spans not supported\n", + __FUNCTION__)); + return false; + } + + dst_x = pixman_fixed_to_int(traps[0].left.p1.x); + dst_y = pixman_fixed_to_int(traps[0].left.p1.y); + + miTrapezoidBounds(ntrap, traps, &extents); + if (extents.y1 >= extents.y2 || extents.x1 >= extents.x2) + return true; + + DBG(("%s: extents (%d, %d), (%d, %d)\n", + __FUNCTION__, extents.x1, extents.y1, extents.x2, extents.y2)); + + if (!sna_compute_composite_region(&clip, + src, NULL, dst, + src_x + extents.x1 - dst_x, + src_y + extents.y1 - dst_y, + 0, 0, + extents.x1, extents.y1, + extents.x2 - extents.x1, + extents.y2 - extents.y1)) { + DBG(("%s: trapezoids do not intersect drawable clips\n", + __FUNCTION__)) ; + return true; + } + + extents = *RegionExtents(&clip); + dx = dst->pDrawable->x; + dy = dst->pDrawable->y; + + DBG(("%s: after clip -- extents (%d, %d), (%d, %d), delta=(%d, %d) src -> (%d, %d)\n", + __FUNCTION__, + extents.x1, extents.y1, + extents.x2, extents.y2, + dx, dy, + src_x + extents.x1 - dst_x - dx, + src_y + extents.y1 - dst_y - dy)); + + memset(&tmp, 0, sizeof(tmp)); + if (!sna->render.composite_spans(sna, op, src, dst, + src_x + extents.x1 - dst_x - dx, + src_y + extents.y1 - dst_y - dy, + extents.x1, extents.y1, + extents.x2 - extents.x1, + extents.y2 - extents.y1, + &tmp)) { + DBG(("%s: fallback -- composite spans render op not supported\n", + __FUNCTION__)); + return false; + } + + dx *= FAST_SAMPLES_X; + dy *= FAST_SAMPLES_Y; + if (tor_init(&tor, &extents, 2*ntrap)) + goto skip; + + for (n = 0; n < ntrap; n++) { + int top, bottom; + + if (!xTrapezoidValid(&traps[n])) + continue; + + if (pixman_fixed_to_int(traps[n].top) + dst->pDrawable->y >= extents.y2 || + pixman_fixed_to_int(traps[n].bottom) + dst->pDrawable->y < extents.y1) + continue; + + top = dy + (traps[n].top >> (16 - FAST_SAMPLES_Y_shift)); + bottom = dy + (traps[n].bottom >> (16 - FAST_SAMPLES_Y_shift)); + if (top >= bottom) + continue; + + tor_add_edge(&tor, dx, dy, top, bottom, &traps[n].left, 1); + tor_add_edge(&tor, dx, dy, top, bottom, &traps[n].right, -1); + } + + if (maskFormat ? maskFormat->depth < 8 : dst->polyEdge == PolyEdgeSharp) { + /* XXX An imprecise approximation */ + if (maskFormat && !operator_is_bounded(op)) { + span = tor_blt_span_mono_unbounded; + if (REGION_NUM_RECTS(&clip) > 1) + span = tor_blt_span_mono_unbounded_clipped; + } else { + span = tor_blt_span_mono; + if (REGION_NUM_RECTS(&clip) > 1) + span = tor_blt_span_mono_clipped; + } + } else { + span = tor_blt_span; + if (REGION_NUM_RECTS(&clip) > 1) + span = tor_blt_span_clipped; + } + + tor_render(sna, &tor, &tmp, &clip, span, + maskFormat && !operator_is_bounded(op)); + +skip: + tor_fini(&tor); + tmp.done(sna, &tmp); + + REGION_UNINIT(NULL, &clip); + return true; +} + +void +sna_composite_trapezoids(CARD8 op, + PicturePtr src, + PicturePtr dst, + PictFormatPtr maskFormat, + INT16 xSrc, INT16 ySrc, + int ntrap, xTrapezoid *traps) +{ + struct sna *sna = to_sna_from_drawable(dst->pDrawable); + bool rectilinear = true; + bool pixel_aligned = true; + int n; + + DBG(("%s(op=%d, src=(%d, %d), mask=%08x, ntrap=%d)\n", __FUNCTION__, + op, xSrc, ySrc, + maskFormat ? (int)maskFormat->format : 0, + ntrap)); + + if (ntrap == 0) + return; + + if (NO_ACCEL) + goto fallback; + + if (sna->kgem.wedged || !sna->have_render) { + DBG(("%s: fallback -- wedged=%d, have_render=%d\n", + __FUNCTION__, sna->kgem.wedged, sna->have_render)); + goto fallback; + } + + if (dst->alphaMap || src->alphaMap) { + DBG(("%s: fallback -- alpha maps=(dst=%p, src=%p)\n", + __FUNCTION__, dst->alphaMap, src->alphaMap)); + goto fallback; + } + + if (too_small(sna, dst->pDrawable) && !picture_is_gpu(src)) { + DBG(("%s: fallback -- dst is too small, %dx%d\n", + __FUNCTION__, + dst->pDrawable->width, + dst->pDrawable->height)); + goto fallback; + } + + /* scan through for fast rectangles */ + for (n = 0; n < ntrap && rectilinear; n++) { + rectilinear &= + traps[n].left.p1.x == traps[n].left.p2.x && + traps[n].right.p1.x == traps[n].right.p2.x; + pixel_aligned &= + ((traps[n].top | traps[n].bottom | + traps[n].left.p1.x | traps[n].left.p2.x | + traps[n].right.p1.x | traps[n].right.p2.x) + & pixman_fixed_1_minus_e) == 0; + } + + if (rectilinear) { + pixel_aligned |= maskFormat ? + maskFormat->depth == 1 : + dst->polyEdge == PolyEdgeSharp; + if (pixel_aligned) { + if (composite_aligned_boxes(op, src, dst, + maskFormat, + xSrc, ySrc, + ntrap, traps)) + return; + } else { + if (composite_unaligned_boxes(op, src, dst, + maskFormat, + xSrc, ySrc, + ntrap, traps)) + return; + } + } + + if (tor_scan_converter(op, src, dst, maskFormat, + xSrc, ySrc, ntrap, traps)) + return; + +fallback: + DBG(("%s: fallback mask=%08x, ntrap=%d\n", __FUNCTION__, + maskFormat ? (unsigned)maskFormat->format : 0, ntrap)); + trapezoids_fallback(op, src, dst, maskFormat, + xSrc, ySrc, + ntrap, traps); +} |