diff options
author | Chris Wilson <chris@chris-wilson.co.uk> | 2011-04-08 07:17:14 +0100 |
---|---|---|
committer | Chris Wilson <chris@chris-wilson.co.uk> | 2011-06-04 09:19:46 +0100 |
commit | bcef98af561939aa48d9236b2dfa2c5626adf4cb (patch) | |
tree | 9d05558947a97595a6fdece968b50eeae45bbfb1 /src/sna/sna_render.h | |
parent | 340cfb7f5271fd1df4c8948e5c9336f5b69a6e6c (diff) |
sna: Introduce a new acceleration model.
The premise is that switching between rings (i.e. the BLT and
RENDER rings) on SandyBridge imposes a large latency overhead whilst
rendering. The cause is that in order to switch rings, we need to split
the batch earlier than is desired and to add serialisation between the
rings. Both of which incur large overhead.
By switching to using a pure 3D blit engine (ok, not so pure as the BLT
engine still has uses for the core drawing model which can not be easily
represented without a combinatorial explosion of shaders) we can take
advantage of additional efficiencies, such as relative relocations, that
have been incorporated into recent hardware advances. However, even
older hardware performs better from avoiding the implicit context
switches and from the batching efficiency of the 3D pipeline...
But this is X, and PolyGlyphBlt still exists and remains in use. So for
the operations that are not worth accelerating in hardware, we introduce a
shadow buffer mechanism through out and reintroduce pixmap migration.
Doing this efficiently is the cornerstone of ensuring that we do exploit
the increased potential of recent hardware for running old applications and
environments (i.e. so that the latest and greatest chip is actually faster
than gen2!)
For the curious, sna is SandyBridge's New Acceleration. If you are
running older chipsets and welcome the performance increase offered by
this patch, then you may choose to call it Snazzy instead.
Speedups
========
gen3 firefox-fishtank 1203584.56 (1203842.75 0.01%) -> 85561.71 (125146.44 14.87%): 14.07x speedup
gen5 grads-heat-map 3385.42 (3489.73 1.44%) -> 350.29 (350.75 0.18%): 9.66x speedup
gen3 xfce4-terminal-a1 4179.02 (4180.09 0.06%) -> 503.90 (531.88 4.48%): 8.29x speedup
gen4 grads-heat-map 2458.66 (2826.34 4.64%) -> 348.82 (349.20 0.29%): 7.05x speedup
gen3 grads-heat-map 1443.33 (1445.32 0.09%) -> 298.55 (298.76 0.05%): 4.83x speedup
gen3 swfdec-youtube 3836.14 (3894.14 0.95%) -> 889.84 (979.56 5.99%): 4.31x speedup
gen6 grads-heat-map 742.11 (744.44 0.15%) -> 172.51 (172.93 0.20%): 4.30x speedup
gen3 firefox-talos-svg 71740.44 (72370.13 0.59%) -> 21959.29 (21995.09 0.68%): 3.27x speedup
gen5 gvim 8045.51 (8071.47 0.17%) -> 2589.38 (3246.78 10.74%): 3.11x speedup
gen6 poppler 3800.78 (3817.92 0.24%) -> 1227.36 (1230.12 0.30%): 3.10x speedup
gen6 gnome-terminal-vim 9106.84 (9111.56 0.03%) -> 3459.49 (3478.52 0.25%): 2.63x speedup
gen5 midori-zoomed 9564.53 (9586.58 0.17%) -> 3677.73 (3837.02 2.02%): 2.60x speedup
gen5 gnome-terminal-vim 38167.25 (38215.82 0.08%) -> 14901.09 (14902.28 0.01%): 2.56x speedup
gen5 poppler 13575.66 (13605.04 0.16%) -> 5554.27 (5555.84 0.01%): 2.44x speedup
gen5 swfdec-giant-steps 8941.61 (8988.72 0.52%) -> 3851.98 (3871.01 0.93%): 2.32x speedup
gen5 xfce4-terminal-a1 18956.60 (18986.90 0.07%) -> 8362.75 (8365.70 0.01%): 2.27x speedup
gen5 firefox-fishtank 88750.31 (88858.23 0.14%) -> 39164.57 (39835.54 0.80%): 2.27x speedup
gen3 midori-zoomed 2392.13 (2397.82 0.14%) -> 1109.96 (1303.10 30.35%): 2.16x speedup
gen6 gvim 2510.34 (2513.34 0.20%) -> 1200.76 (1204.30 0.22%): 2.09x speedup
gen5 firefox-planet-gnome 40478.16 (40565.68 0.09%) -> 19606.22 (19648.79 0.16%): 2.06x speedup
gen5 gnome-system-monitor 10344.47 (10385.62 0.29%) -> 5136.69 (5256.85 1.15%): 2.01x speedup
gen3 poppler 2595.23 (2603.10 0.17%) -> 1297.56 (1302.42 0.61%): 2.00x speedup
gen6 firefox-talos-gfx 7184.03 (7194.97 0.13%) -> 3806.31 (3811.66 0.06%): 1.89x speedup
gen5 evolution 8739.25 (8766.12 0.27%) -> 4817.54 (5050.96 1.54%): 1.81x speedup
gen3 evolution 1684.06 (1696.88 0.35%) -> 1004.99 (1008.55 0.85%): 1.68x speedup
gen3 gnome-terminal-vim 4285.13 (4287.68 0.04%) -> 2715.97 (3202.17 13.52%): 1.58x speedup
gen5 swfdec-youtube 5843.94 (5951.07 0.91%) -> 3810.86 (3826.04 1.32%): 1.53x speedup
gen4 poppler 7496.72 (7558.83 0.58%) -> 5125.08 (5247.65 1.44%): 1.46x speedup
gen4 gnome-terminal-vim 21126.24 (21292.08 0.85%) -> 14590.25 (15066.33 1.80%): 1.45x speedup
gen5 firefox-talos-svg 99873.69 (100300.95 0.37%) -> 70745.66 (70818.86 0.05%): 1.41x speedup
gen4 firefox-planet-gnome 28205.10 (28304.45 0.27%) -> 19996.11 (20081.44 0.56%): 1.41x speedup
gen5 firefox-talos-gfx 93070.85 (93194.72 0.10%) -> 67687.93 (70374.37 1.30%): 1.37x speedup
gen4 evolution 6696.25 (6854.14 0.85%) -> 4958.62 (5027.73 0.85%): 1.35x speedup
gen3 swfdec-giant-steps 2538.03 (2539.30 0.04%) -> 1895.71 (2050.62 62.43%): 1.34x speedup
gen4 gvim 4356.18 (4422.78 0.70%) -> 3276.31 (3281.69 0.13%): 1.33x speedup
gen6 evolution 1242.13 (1245.44 0.72%) -> 953.76 (954.54 0.07%): 1.30x speedup
gen6 firefox-planet-gnome 4554.23 (4560.69 0.08%) -> 3758.76 (3768.97 0.28%): 1.21x speedup
gen3 firefox-talos-gfx 6264.13 (6284.65 0.30%) -> 5261.56 (5370.87 1.28%): 1.19x speedup
gen4 midori-zoomed 4771.13 (4809.90 0.73%) -> 4037.03 (4118.93 0.85%): 1.18x speedup
gen6 swfdec-giant-steps 1557.06 (1560.13 0.12%) -> 1336.34 (1341.29 0.32%): 1.17x speedup
gen4 firefox-talos-gfx 80767.28 (80986.31 0.17%) -> 69629.08 (69721.71 0.06%): 1.16x speedup
gen6 midori-zoomed 1463.70 (1463.76 0.08%) -> 1331.45 (1336.56 0.22%): 1.10x speedup
Slowdowns
=========
gen6 xfce4-terminal-a1 2030.25 (2036.23 0.25%) -> 2144.60 (2240.31 4.29%): 1.06x slowdown
gen4 swfdec-youtube 3580.00 (3597.23 3.92%) -> 3826.90 (3862.24 0.91%): 1.07x slowdown
gen4 firefox-talos-svg 66112.25 (66256.51 0.11%) -> 71433.40 (71584.31 0.14%): 1.08x slowdown
gen4 gnome-system-monitor 5691.60 (5724.03 0.56%) -> 6707.56 (6747.83 0.33%): 1.18x slowdown
gen3 ocitysmap 3494.05 (3502.44 0.20%) -> 4321.99 (4524.42 2.78%): 1.24x slowdown
gen4 ocitysmap 3628.42 (3641.66 9.37%) -> 5177.16 (5828.74 8.38%): 1.43x slowdown
gen5 ocitysmap 4027.77 (4068.11 0.80%) -> 5748.26 (6282.25 7.38%): 1.43x slowdown
gen6 ocitysmap 1401.61 (1402.24 0.40%) -> 2365.74 (2379.14 4.12%): 1.69x slowdown
[Note the performance regression for ocitysmap comes from that we now
attempt to support rendering to and (more importantly) from large
surfaces. By enabling such operations is the only way to one day be
faster than purely using the CPU, in the meantime we suffer regression
due to the increased migration and aperture thrashing. The other couple
of regressions will be eliminated with improved span and shader support,
now that the framework for such is in place.]
The performance increase for Cairo completely overlooks the other
critical aspects of the architecture:
World of Padman:
gen3 (800x600): 57.5 -> 96.2
gen4 (800x600): 47.8 -> 74.6
gen6 (1366x768): 100.4 -> 140.3 [F15]
144.3 -> 146.4 [drm-intel-next]
x11perf (gen6);
aa10text: 3.47 -> 14.3 Mglyphs/s [unthrottled!]
copywinwin10: 1.66 -> 1.99 Mops/s
copywinpix10: 2.28 -> 2.98 Mops/s
And we do not have a good measure for how much improvement the reworking
of the fallback paths give, except that xterm is now over 4x faster...
PS: This depends upon the Xorg patchset "Remove the cacheing of the last
scratch PixmapRec" for correct invalidations of scratch Pixmaps (used by
the dix to implement SHM operations, used by chromium and gtk+ pixbufs.
PPS: ./configure --enable-sna
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Diffstat (limited to 'src/sna/sna_render.h')
-rw-r--r-- | src/sna/sna_render.h | 511 |
1 files changed, 511 insertions, 0 deletions
diff --git a/src/sna/sna_render.h b/src/sna/sna_render.h new file mode 100644 index 00000000..328eaf78 --- /dev/null +++ b/src/sna/sna_render.h @@ -0,0 +1,511 @@ +#ifndef SNA_RENDER_H +#define SNA_RENDER_H + +#define GRADIENT_CACHE_SIZE 16 + +#define fastcall __attribute__((regparm(3))) + +struct sna; +struct sna_glyph; +struct sna_video; +struct sna_video_frame; + +struct sna_composite_rectangles { + struct sna_coordinate { + int16_t x, y; + } src, mask, dst; + int16_t width, height; +}; + +struct sna_composite_op { + fastcall void (*blt)(struct sna *sna, const struct sna_composite_op *op, + const struct sna_composite_rectangles *r); + void (*boxes)(struct sna *sna, const struct sna_composite_op *op, + const BoxRec *box, int nbox); + void (*done)(struct sna *sna, const struct sna_composite_op *op); + + struct sna_damage **damage; + + uint32_t op; + + struct { + PixmapPtr pixmap; + CARD32 format; + struct kgem_bo *bo; + int16_t x, y; + uint16_t width, height; + } dst; + + struct sna_composite_channel { + struct kgem_bo *bo; + PictTransform *transform; + uint16_t width; + uint16_t height; + uint32_t pict_format; + uint32_t card_format; + uint32_t filter; + uint32_t repeat; + uint32_t is_affine : 1; + uint32_t is_solid : 1; + uint32_t is_opaque : 1; + uint32_t alpha_fixup : 1; + uint32_t rb_reversed : 1; + int16_t offset[2]; + float scale[2]; + + struct gen3_shader_channel { + int type; + uint32_t mode; + uint32_t constants; + } gen3; + } src, mask; + uint32_t is_affine : 1; + uint32_t has_component_alpha : 1; + uint32_t need_magic_ca_pass : 1; + uint32_t rb_reversed : 1; + + int floats_per_vertex; + fastcall void (*prim_emit)(struct sna *sna, + const struct sna_composite_op *op, + const struct sna_composite_rectangles *r); + + struct sna_composite_redirect { + struct kgem_bo *real_bo; + BoxRec box; + } redirect; + + union { + struct sna_blt_state { + PixmapPtr src_pixmap; + int16_t sx, sy; + + uint32_t inplace :1; + uint32_t overwrites:1; + + int hdr; + uint32_t cmd; + uint32_t br13; + uint32_t pitch[2]; + uint32_t pixel; + struct kgem_bo *bo[3]; + } blt; + + struct { + int nothing; + } gen2; + + struct { + float constants[8]; + uint32_t num_constants; + } gen3; + + struct { + int wm_kernel; + int ve_id; + } gen4; + + struct { + int wm_kernel; + int ve_id; + } gen5; + + struct { + int wm_kernel; + int nr_surfaces; + int nr_inputs; + int ve_id; + } gen6; + + void *priv; + } u; +}; + +struct sna_composite_spans_op { + struct sna_composite_op base; + + void (*boxes)(struct sna *sna, const struct sna_composite_spans_op *op, + const BoxRec *box, int nbox, + float opacity); + void (*done)(struct sna *sna, const struct sna_composite_spans_op *op); + + void (*prim_emit)(struct sna *sna, + const struct sna_composite_spans_op *op, + const BoxRec *box, + float opacity); +}; + +struct sna_fill_op { + struct sna_composite_op base; + + void (*blt)(struct sna *sna, const struct sna_fill_op *op, + int16_t x, int16_t y, int16_t w, int16_t h); + void (*done)(struct sna *sna, const struct sna_fill_op *op); +}; + +struct sna_copy_op { + struct sna_composite_op base; + + void (*blt)(struct sna *sna, const struct sna_copy_op *op, + int16_t sx, int16_t sy, + int16_t w, int16_t h, + int16_t dx, int16_t dy); + void (*done)(struct sna *sna, const struct sna_copy_op *op); +}; + +struct sna_render { + int max_3d_size; + + Bool (*composite)(struct sna *sna, uint8_t op, + PicturePtr dst, PicturePtr src, PicturePtr mask, + int16_t src_x, int16_t src_y, + int16_t msk_x, int16_t msk_y, + int16_t dst_x, int16_t dst_y, + int16_t w, int16_t h, + struct sna_composite_op *tmp); + + Bool (*composite_spans)(struct sna *sna, uint8_t op, + PicturePtr dst, PicturePtr src, + int16_t src_x, int16_t src_y, + int16_t dst_x, int16_t dst_y, + int16_t w, int16_t h, + struct sna_composite_spans_op *tmp); + + Bool (*video)(struct sna *sna, + struct sna_video *video, + struct sna_video_frame *frame, + RegionPtr dstRegion, + short src_w, short src_h, + short drw_w, short drw_h, + PixmapPtr pixmap); + + Bool (*fill_boxes)(struct sna *sna, + CARD8 op, + PictFormat format, + const xRenderColor *color, + PixmapPtr dst, struct kgem_bo *dst_bo, + const BoxRec *box, int n); + Bool (*fill)(struct sna *sna, uint8_t alu, + PixmapPtr dst, struct kgem_bo *dst_bo, + uint32_t color, + struct sna_fill_op *tmp); + + Bool (*copy_boxes)(struct sna *sna, uint8_t alu, + PixmapPtr src, struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy, + PixmapPtr dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, + const BoxRec *box, int n); + Bool (*copy)(struct sna *sna, uint8_t alu, + PixmapPtr src, struct kgem_bo *src_bo, + PixmapPtr dst, struct kgem_bo *dst_bo, + struct sna_copy_op *op); + + void (*flush)(struct sna *sna); + void (*reset)(struct sna *sna); + void (*context_switch)(struct sna *sna, int new_mode); + void (*fini)(struct sna *sna); + + struct sna_solid_cache { + struct kgem_bo *cache_bo; + uint32_t color[1024]; + struct kgem_bo *bo[1024]; + int last; + int size; + int dirty; + } solid_cache; + + struct { + struct sna_gradient_cache { + struct kgem_bo *bo; + int nstops; + PictGradientStop *stops; + } cache[GRADIENT_CACHE_SIZE]; + int size; + } gradient_cache; + + struct sna_glyph_cache{ + PicturePtr picture; + struct sna_glyph **glyphs; + uint16_t count; + uint16_t evict; + } glyph[2]; + + uint16_t vertex_start; + uint16_t vertex_index; + uint16_t vertex_used; + uint16_t vertex_reloc[8]; + + float vertex_data[16*1024]; + const struct sna_composite_op *op; +}; + +struct gen2_render_state { + Bool need_invariant; + uint16_t vertex_offset; +}; + +struct gen3_render_state { + uint32_t current_dst; + Bool need_invariant; + uint32_t tex_count; + uint32_t last_drawrect_limit; + uint32_t last_target; + uint32_t last_blend; + uint32_t last_constants; + uint32_t last_sampler; + uint32_t last_shader; + uint32_t last_diffuse; + uint32_t last_specular; + + uint16_t vertex_offset; + uint16_t last_vertex_offset; + uint16_t floats_per_vertex; + uint16_t last_floats_per_vertex; + + uint32_t tex_map[4]; + uint32_t tex_handle[2]; + uint32_t tex_delta[2]; +}; + +struct gen4_render_state { + struct kgem_bo *general_bo; + + uint32_t vs; + uint32_t sf[2]; + uint32_t wm; + uint32_t cc; + + int ve_id; + uint32_t drawrect_offset; + uint32_t drawrect_limit; + uint32_t vb_id; + uint16_t vertex_offset; + uint16_t last_primitive; + int16_t floats_per_vertex; + uint16_t surface_table; + uint16_t last_pipelined_pointers; + + Bool needs_invariant; +}; + +struct gen5_render_state { + struct kgem_bo *general_bo; + + uint32_t vs; + uint32_t sf[2]; + uint32_t wm; + uint32_t cc; + + int ve_id; + uint32_t drawrect_offset; + uint32_t drawrect_limit; + uint32_t vb_id; + uint16_t vertex_offset; + uint16_t last_primitive; + int16_t floats_per_vertex; + uint16_t surface_table; + uint16_t last_pipelined_pointers; + + Bool needs_invariant; +}; + +enum { + GEN6_WM_KERNEL_NOMASK = 0, + GEN6_WM_KERNEL_NOMASK_PROJECTIVE, + + GEN6_WM_KERNEL_MASK, + GEN6_WM_KERNEL_MASK_PROJECTIVE, + + GEN6_WM_KERNEL_MASKCA, + GEN6_WM_KERNEL_MASKCA_PROJECTIVE, + + GEN6_WM_KERNEL_MASKCA_SRCALPHA, + GEN6_WM_KERNEL_MASKCA_SRCALPHA_PROJECTIVE, + + GEN6_WM_KERNEL_VIDEO_PLANAR, + GEN6_WM_KERNEL_VIDEO_PACKED, + GEN6_KERNEL_COUNT +}; + +struct gen6_render_state { + struct kgem_bo *general_bo; + + uint32_t vs_state; + uint32_t sf_state; + uint32_t sf_mask_state; + uint32_t wm_state; + uint32_t wm_kernel[GEN6_KERNEL_COUNT]; + + uint32_t cc_vp; + uint32_t cc_blend; + + uint32_t drawrect_offset; + uint32_t drawrect_limit; + uint32_t blend; + uint32_t samplers; + uint32_t kernel; + + uint16_t num_sf_outputs; + uint16_t vb_id; + uint16_t ve_id; + uint16_t vertex_offset; + uint16_t last_primitive; + int16_t floats_per_vertex; + uint16_t surface_table; + + Bool needs_invariant; +}; + +struct sna_static_stream { + uint32_t size, used; + uint8_t *data; +}; + +int sna_static_stream_init(struct sna_static_stream *stream); +uint32_t sna_static_stream_add(struct sna_static_stream *stream, + const void *data, uint32_t len, uint32_t align); +void *sna_static_stream_map(struct sna_static_stream *stream, + uint32_t len, uint32_t align); +uint32_t sna_static_stream_offsetof(struct sna_static_stream *stream, + void *ptr); +struct kgem_bo *sna_static_stream_fini(struct sna *sna, + struct sna_static_stream *stream); + +struct kgem_bo * +sna_render_get_solid(struct sna *sna, + uint32_t color); + +void +sna_render_flush_solid(struct sna *sna); + +struct kgem_bo * +sna_render_get_gradient(struct sna *sna, + PictGradient *pattern); + +uint32_t sna_rgba_for_color(uint32_t color, int depth); +Bool sna_picture_is_solid(PicturePtr picture, uint32_t *color); + +void no_render_init(struct sna *sna); + +#ifdef SNA_GEN2 +Bool gen2_render_init(struct sna *sna); +#else +static inline Bool gen2_render_init(struct sna *sna) { return FALSE; } +#endif + +#ifdef SNA_GEN3 +Bool gen3_render_init(struct sna *sna); +#else +static inline Bool gen3_render_init(struct sna *sna) { return FALSE; } +#endif + +#ifdef SNA_GEN4 +Bool gen4_render_init(struct sna *sna); +#else +static inline Bool gen4_render_init(struct sna *sna) { return FALSE; } +#endif + +#ifdef SNA_GEN5 +Bool gen5_render_init(struct sna *sna); +#else +static inline Bool gen5_render_init(struct sna *sna) { return FALSE; } +#endif + +#ifdef SNA_GEN6 +Bool gen6_render_init(struct sna *sna); +#else +static inline Bool gen6_render_init(struct sna *sna) { return FALSE; } +#endif + +Bool sna_tiling_composite(struct sna *sna, + uint32_t op, + PicturePtr src, + PicturePtr mask, + PicturePtr dst, + int16_t src_x, int16_t src_y, + int16_t mask_x, int16_t mask_y, + int16_t dst_x, int16_t dst_y, + int16_t width, int16_t height, + struct sna_composite_op *tmp); + +Bool sna_blt_composite(struct sna *sna, + uint32_t op, + PicturePtr src, + PicturePtr dst, + int16_t src_x, int16_t src_y, + int16_t dst_x, int16_t dst_y, + int16_t width, int16_t height, + struct sna_composite_op *tmp); + +bool sna_blt_fill(struct sna *sna, uint8_t alu, + struct kgem_bo *bo, + int bpp, + uint32_t pixel, + struct sna_fill_op *fill); + +bool sna_blt_copy(struct sna *sna, uint8_t alu, + struct kgem_bo *src, + struct kgem_bo *dst, + int bpp, + struct sna_copy_op *copy); + +Bool sna_blt_fill_boxes(struct sna *sna, uint8_t alu, + struct kgem_bo *bo, + int bpp, + uint32_t pixel, + const BoxRec *box, int n); + +Bool sna_blt_copy_boxes(struct sna *sna, uint8_t alu, + struct kgem_bo *src_bo, int16_t src_dx, int16_t src_dy, + struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, + int bpp, + const BoxRec *box, int n); + +Bool sna_get_pixel_from_rgba(uint32_t *pixel, + uint16_t red, + uint16_t green, + uint16_t blue, + uint16_t alpha, + uint32_t format); + +int +sna_render_pixmap_bo(struct sna *sna, + struct sna_composite_channel *channel, + PixmapPtr pixmap, + int16_t x, int16_t y, + int16_t w, int16_t h, + int16_t dst_x, int16_t dst_y); + +int +sna_render_picture_extract(struct sna *sna, + PicturePtr picture, + struct sna_composite_channel *channel, + int16_t x, int16_t y, + int16_t w, int16_t h, + int16_t dst_x, int16_t dst_y); + +int +sna_render_picture_fixup(struct sna *sna, + PicturePtr picture, + struct sna_composite_channel *channel, + int16_t x, int16_t y, + int16_t w, int16_t h, + int16_t dst_x, int16_t dst_y); + +int +sna_render_picture_convert(struct sna *sna, + PicturePtr picture, + struct sna_composite_channel *channel, + PixmapPtr pixmap, + int16_t x, int16_t y, + int16_t w, int16_t h, + int16_t dst_x, int16_t dst_y); + +Bool +sna_render_composite_redirect(struct sna *sna, + struct sna_composite_op *op, + int x, int y, int width, int height); + +void +sna_render_composite_redirect_done(struct sna *sna, + const struct sna_composite_op *op); + +#endif /* SNA_RENDER_H */ |