From f464d508c870293699616626d64bd64f16051467 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 11 Aug 2012 10:10:32 +0100 Subject: sna/gen6+: Try to use the BLT to avoid TLB misses Signed-off-by: Chris Wilson --- src/sna/gen6_render.c | 71 +++++++++++++++------------- src/sna/gen7_render.c | 74 +++++++++++++++++------------- src/sna/sna_blt.c | 125 ++++++++++++++++++++++++++++++++++++++++++++++++++ src/sna/sna_render.h | 5 ++ 4 files changed, 212 insertions(+), 63 deletions(-) (limited to 'src') diff --git a/src/sna/gen6_render.c b/src/sna/gen6_render.c index 6d4d79e4..f8b1e711 100644 --- a/src/sna/gen6_render.c +++ b/src/sna/gen6_render.c @@ -2267,6 +2267,7 @@ gen6_composite_picture(struct sna *sna, } else channel->transform = picture->transform; + channel->pict_format = picture->format; channel->card_format = gen6_get_card_format(picture->format); if (channel->card_format == (unsigned)-1) return sna_render_picture_convert(sna, picture, channel, pixmap, @@ -2369,6 +2370,16 @@ static bool can_switch_rings(struct sna *sna) return sna->kgem.mode == KGEM_NONE && sna->kgem.has_semaphores && !NO_RING_SWITCH; } +static inline bool untiled_tlb_miss(struct kgem_bo *bo) +{ + return bo->tiling == I915_TILING_NONE && bo->pitch >= 4096; +} + +static bool prefer_blt_bo(struct sna *sna, struct kgem_bo *bo) +{ + return untiled_tlb_miss(bo) && kgem_bo_can_blt(&sna->kgem, bo); +} + static bool try_blt(struct sna *sna, PicturePtr dst, PicturePtr src, @@ -2606,6 +2617,19 @@ reuse_source(struct sna *sna, return true; } +static bool +prefer_blt_composite(struct sna *sna, struct sna_composite_op *tmp) +{ + if (sna->kgem.ring == KGEM_BLT) + return true; + + if (!prefer_blt_ring(sna)) + return false; + + return (prefer_blt_bo(sna, tmp->dst.bo) || + prefer_blt_bo(sna, tmp->src.bo)); +} + static bool gen6_render_composite(struct sna *sna, uint8_t op, @@ -2677,23 +2701,20 @@ gen6_render_composite(struct sna *sna, gen6_composite_solid_init(sna, &tmp->src, 0); /* fall through to fixup */ case 1: + /* Did we just switch rings to prepare the source? */ + if (mask == NULL && + prefer_blt_composite(sna, tmp) && + sna_blt_composite__convert(sna, + src_x, src_y, + width, height, + dst_x, dst_y, + tmp)) + return true; + gen6_composite_channel_convert(&tmp->src); break; } - /* Did we just switch rings to prepare the source? */ - if (sna->kgem.ring == KGEM_BLT && mask == NULL && - sna_blt_composite(sna, op, - src, dst, - src_x, src_y, - dst_x, dst_y, - width, height, tmp)) { - if (tmp->redirect.real_bo) - kgem_bo_destroy(&sna->kgem, tmp->redirect.real_bo); - kgem_bo_destroy(&sna->kgem, tmp->src.bo); - return true; - } - tmp->is_affine = tmp->src.is_affine; tmp->has_component_alpha = false; tmp->need_magic_ca_pass = false; @@ -3216,21 +3237,9 @@ gen6_emit_copy_state(struct sna *sna, gen6_emit_state(sna, op, offset | dirty); } -static inline bool untiled_tlb_miss(struct kgem_bo *bo) -{ - return bo->tiling == I915_TILING_NONE && bo->pitch >= 4096; -} - -static bool prefer_blt_bo(struct sna *sna, - PixmapPtr pixmap, - struct kgem_bo *bo) -{ - return untiled_tlb_miss(bo) && kgem_bo_can_blt(&sna->kgem, bo); -} - static inline bool prefer_blt_copy(struct sna *sna, - PixmapPtr src, struct kgem_bo *src_bo, - PixmapPtr dst, struct kgem_bo *dst_bo, + struct kgem_bo *src_bo, + struct kgem_bo *dst_bo, unsigned flags) { if (PREFER_RENDER) @@ -3238,8 +3247,8 @@ static inline bool prefer_blt_copy(struct sna *sna, return (sna->kgem.ring == KGEM_BLT || (flags & COPY_LAST && sna->kgem.mode == KGEM_NONE) || - prefer_blt_bo(sna, src, src_bo) || - prefer_blt_bo(sna, dst, dst_bo)); + prefer_blt_bo(sna, src_bo) || + prefer_blt_bo(sna, dst_bo)); } static inline bool @@ -3289,7 +3298,7 @@ gen6_render_copy_boxes(struct sna *sna, uint8_t alu, dst_bo, dst_dx, dst_dy, box, n, &extents))); - if (prefer_blt_copy(sna, src, src_bo, dst, dst_bo, flags) && + if (prefer_blt_copy(sna, src_bo, dst_bo, flags) && sna_blt_compare_depth(&src->drawable, &dst->drawable) && sna_blt_copy_boxes(sna, alu, src_bo, src_dx, src_dy, @@ -3528,7 +3537,7 @@ gen6_render_copy(struct sna *sna, uint8_t alu, src->drawable.width, src->drawable.height, dst->drawable.width, dst->drawable.height)); - if (prefer_blt_copy(sna, src, src_bo, dst, dst_bo, 0) && + if (prefer_blt_copy(sna, src_bo, dst_bo, 0) && sna_blt_compare_depth(&src->drawable, &dst->drawable) && sna_blt_copy(sna, alu, src_bo, dst_bo, diff --git a/src/sna/gen7_render.c b/src/sna/gen7_render.c index 954e42fb..318cbef8 100644 --- a/src/sna/gen7_render.c +++ b/src/sna/gen7_render.c @@ -2367,6 +2367,7 @@ gen7_composite_picture(struct sna *sna, } else channel->transform = picture->transform; + channel->pict_format = picture->format; channel->card_format = gen7_get_card_format(picture->format); if (channel->card_format == (unsigned)-1) return sna_render_picture_convert(sna, picture, channel, pixmap, @@ -2457,6 +2458,16 @@ inline static bool can_switch_rings(struct sna *sna) return sna->kgem.mode == KGEM_NONE && sna->kgem.has_semaphores && !NO_RING_SWITCH; } +static inline bool untiled_tlb_miss(struct kgem_bo *bo) +{ + return bo->tiling == I915_TILING_NONE && bo->pitch >= 4096; +} + +static bool prefer_blt_bo(struct sna *sna, struct kgem_bo *bo) +{ + return untiled_tlb_miss(bo) && kgem_bo_can_blt(&sna->kgem, bo); +} + inline static bool prefer_blt_ring(struct sna *sna) { return sna->kgem.ring != KGEM_RENDER || can_switch_rings(sna); @@ -2708,6 +2719,19 @@ reuse_source(struct sna *sna, return true; } +static bool +prefer_blt_composite(struct sna *sna, struct sna_composite_op *tmp) +{ + if (sna->kgem.ring == KGEM_BLT) + return true; + + if (!prefer_blt_ring(sna)) + return false; + + return (prefer_blt_bo(sna, tmp->dst.bo) || + prefer_blt_bo(sna, tmp->src.bo)); +} + static bool gen7_render_composite(struct sna *sna, uint8_t op, @@ -2752,7 +2776,8 @@ gen7_render_composite(struct sna *sna, if (!gen7_composite_set_target(sna, tmp, dst)) return false; - if (mask == NULL && sna->kgem.mode == KGEM_BLT && + if (mask == NULL && + sna->kgem.mode == KGEM_BLT && sna_blt_composite(sna, op, src, dst, src_x, src_y, @@ -2779,23 +2804,20 @@ gen7_render_composite(struct sna *sna, gen7_composite_solid_init(sna, &tmp->src, 0); /* fall through to fixup */ case 1: + /* Did we just switch rings to prepare the source? */ + if (mask == NULL && + prefer_blt_composite(sna, tmp) && + sna_blt_composite__convert(sna, + src_x, src_y, + width, height, + dst_x, dst_y, + tmp)) + return true; + gen7_composite_channel_convert(&tmp->src); break; } - /* Did we just switch rings to prepare the source? */ - if (sna->kgem.ring == KGEM_BLT && mask == NULL && - sna_blt_composite(sna, op, - src, dst, - src_x, src_y, - dst_x, dst_y, - width, height, tmp)) { - if (tmp->redirect.real_bo) - kgem_bo_destroy(&sna->kgem, tmp->redirect.real_bo); - kgem_bo_destroy(&sna->kgem, tmp->src.bo); - return true; - } - tmp->is_affine = tmp->src.is_affine; tmp->has_component_alpha = false; tmp->need_magic_ca_pass = false; @@ -3305,27 +3327,15 @@ gen7_emit_copy_state(struct sna *sna, gen7_emit_state(sna, op, offset); } -static inline bool untiled_tlb_miss(struct kgem_bo *bo) -{ - return bo->tiling == I915_TILING_NONE && bo->pitch >= 4096; -} - -static bool prefer_blt_bo(struct sna *sna, - PixmapPtr pixmap, - struct kgem_bo *bo) -{ - return untiled_tlb_miss(bo) && kgem_bo_can_blt(&sna->kgem, bo); -} - static inline bool prefer_blt_copy(struct sna *sna, - PixmapPtr src, struct kgem_bo *src_bo, - PixmapPtr dst, struct kgem_bo *dst_bo, + struct kgem_bo *src_bo, + struct kgem_bo *dst_bo, unsigned flags) { return (sna->kgem.ring == KGEM_BLT || (flags & COPY_LAST && sna->kgem.mode == KGEM_NONE) || - prefer_blt_bo(sna, src, src_bo) || - prefer_blt_bo(sna, dst, dst_bo)); + prefer_blt_bo(sna, src_bo) || + prefer_blt_bo(sna, dst_bo)); } static inline bool @@ -3375,7 +3385,7 @@ gen7_render_copy_boxes(struct sna *sna, uint8_t alu, dst_bo, dst_dx, dst_dy, box, n, &extents))); - if (prefer_blt_copy(sna, src, src_bo, dst, dst_bo, flags) && + if (prefer_blt_copy(sna, src_bo, dst_bo, flags) && sna_blt_compare_depth(&src->drawable, &dst->drawable) && sna_blt_copy_boxes(sna, alu, src_bo, src_dx, src_dy, @@ -3604,7 +3614,7 @@ gen7_render_copy(struct sna *sna, uint8_t alu, src->drawable.width, src->drawable.height, dst->drawable.width, dst->drawable.height)); - if (prefer_blt_copy(sna, src, src_bo, dst, dst_bo, 0) && + if (prefer_blt_copy(sna, src_bo, dst_bo, 0) && sna_blt_compare_depth(&src->drawable, &dst->drawable) && sna_blt_copy(sna, alu, src_bo, dst_bo, diff --git a/src/sna/sna_blt.c b/src/sna/sna_blt.c index 6bf223ab..77fca3dd 100644 --- a/src/sna/sna_blt.c +++ b/src/sna/sna_blt.c @@ -1674,6 +1674,131 @@ clear: return ret; } +static void convert_done(struct sna *sna, const struct sna_composite_op *op) +{ + struct kgem *kgem = &sna->kgem; + + if (kgem->gen >= 60 && kgem_check_batch(kgem, 3)) { + uint32_t *b = kgem->batch + kgem->nbatch; + b[0] = XY_SETUP_CLIP; + b[1] = b[2] = 0; + kgem->nbatch += 3; + } + + kgem_bo_destroy(kgem, op->src.bo); + sna_render_composite_redirect_done(sna, op); +} + +bool +sna_blt_composite__convert(struct sna *sna, + int x, int y, + int width, int height, + int dst_x, int dst_y, + struct sna_composite_op *tmp) +{ + uint32_t alpha_fixup; + uint8_t op; + +#if DEBUG_NO_BLT || NO_BLT_COMPOSITE + return false; +#endif + + DBG(("%s\n", __FUNCTION__)); + + if (!kgem_bo_can_blt(&sna->kgem, tmp->dst.bo) || + !kgem_bo_can_blt(&sna->kgem, tmp->src.bo)) { + DBG(("%s: cannot blt from src or to dst\n", __FUNCTION__)); + return false; + } + + if (tmp->src.transform) { + DBG(("%s: transforms not handled by the BLT\n")); + return false; + } + + if (tmp->src.filter == PictFilterConvolution) { + DBG(("%s: convolutions filters not handled\n", + __FUNCTION__)); + return false; + } + + op = tmp->op; + if (op == PictOpOver && PICT_FORMAT_A(tmp->src.pict_format) == 0) + op = PictOpSrc; + if (op != PictOpSrc) { + DBG(("%s: unsuported op [%d] for blitting\n", + __FUNCTION__, op)); + return false; + } + + alpha_fixup = 0; + if (!(tmp->dst.format == tmp->src.pict_format || + tmp->dst.format == alphaless(tmp->src.pict_format) || + (alphaless(tmp->dst.format) == alphaless(tmp->src.pict_format) && + sna_get_pixel_from_rgba(&alpha_fixup, + 0, 0, 0, 0xffff, + tmp->dst.format)))) { + DBG(("%s: incompatible src/dst formats src=%08x, dst=%08x\n", + __FUNCTION__, + (unsigned)tmp->src.pict_format, + tmp->dst.format)); + return false; + } + + x += tmp->src.offset[0]; + y += tmp->src.offset[1]; + if (x < 0 || y < 0 || + x + width > tmp->src.width || + y + height > tmp->src.height) { + DBG(("%s: source extends outside (%d, %d), (%d, %d) of valid drawable %dx%d\n", + __FUNCTION__, + x, y, x+width, y+width, tmp->src.width, tmp->src.height)); + return false; + } + + if (!kgem_check_many_bo_fenced(&sna->kgem, tmp->dst.bo, tmp->src.bo, NULL)) { + _kgem_submit(&sna->kgem); + if (!kgem_check_many_bo_fenced(&sna->kgem, + tmp->dst.bo, tmp->src.bo, NULL)) { + DBG(("%s: fallback -- no room in aperture\n", __FUNCTION__)); + return false; + } + _kgem_set_mode(&sna->kgem, KGEM_BLT); + } + + tmp->u.blt.src_pixmap = NULL; + tmp->u.blt.sx = x - dst_x; + tmp->u.blt.sy = y - dst_y; + DBG(("%s: blt dst offset (%d, %d), source offset (%d, %d), with alpha fixup? %x\n", + __FUNCTION__, + tmp->dst.x, tmp->dst.y, tmp->u.blt.sx, tmp->u.blt.sy, alpha_fixup)); + + if (alpha_fixup) { + tmp->blt = blt_composite_copy_with_alpha; + tmp->box = blt_composite_copy_box_with_alpha; + tmp->boxes = blt_composite_copy_boxes_with_alpha; + + if (!sna_blt_alpha_fixup_init(sna, &tmp->u.blt, + tmp->src.bo, tmp->dst.bo, + PICT_FORMAT_BPP(tmp->src.pict_format), + alpha_fixup)) + return false; + } else { + tmp->blt = blt_composite_copy; + tmp->box = blt_composite_copy_box; + tmp->boxes = blt_composite_copy_boxes; + + if (!sna_blt_copy_init(sna, &tmp->u.blt, + tmp->src.bo, tmp->dst.bo, + PICT_FORMAT_BPP(tmp->src.pict_format), + GXcopy)) + return false; + } + + tmp->done = convert_done; + return true; +} + static void sna_blt_fill_op_blt(struct sna *sna, const struct sna_fill_op *op, int16_t x, int16_t y, diff --git a/src/sna/sna_render.h b/src/sna/sna_render.h index a2bcb45e..b079178b 100644 --- a/src/sna/sna_render.h +++ b/src/sna/sna_render.h @@ -570,6 +570,11 @@ bool sna_blt_composite(struct sna *sna, int16_t dst_x, int16_t dst_y, int16_t width, int16_t height, struct sna_composite_op *tmp); +bool sna_blt_composite__convert(struct sna *sna, + int x, int y, + int width, int height, + int dst_x, int dst_y, + struct sna_composite_op *tmp); bool sna_blt_fill(struct sna *sna, uint8_t alu, struct kgem_bo *bo, -- cgit v1.2.3