diff options
author | Chris Wilson <chris@chris-wilson.co.uk> | 2012-01-06 15:26:11 +0000 |
---|---|---|
committer | Chris Wilson <chris@chris-wilson.co.uk> | 2012-01-06 17:50:01 +0000 |
commit | 9f1935bb4e894264053d94e53c99d5ad607700fb (patch) | |
tree | 689fdc95dda28ba227cf26d9ee2347a303962bb8 /src | |
parent | 141001df6c9c3485c500ed531a214c09b46c1d3b (diff) |
sna: Support performing alpha-fixup on the source
By inlining the swizzling of the alpha-channel we can support BLT copies
from an alpha-less pixmap to an alpha-destination.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Diffstat (limited to 'src')
-rw-r--r-- | src/sna/blt.c | 105 | ||||
-rw-r--r-- | src/sna/sna.h | 13 | ||||
-rw-r--r-- | src/sna/sna_blt.c | 356 | ||||
-rw-r--r-- | src/sna/sna_io.c | 241 | ||||
-rw-r--r-- | src/sna/sna_reg.h | 1 |
5 files changed, 682 insertions, 34 deletions
diff --git a/src/sna/blt.c b/src/sna/blt.c index 7a77fa49..d28ad985 100644 --- a/src/sna/blt.c +++ b/src/sna/blt.c @@ -106,3 +106,108 @@ memcpy_blt(const void *src, void *dst, int bpp, break; } } + +void +memcpy_xor(const void *src, void *dst, int bpp, + int32_t src_stride, int32_t dst_stride, + int16_t src_x, int16_t src_y, + int16_t dst_x, int16_t dst_y, + uint16_t width, uint16_t height, + uint32_t and, uint32_t or) +{ + uint8_t *src_bytes; + uint8_t *dst_bytes; + int i; + + assert(width && height); + assert(bpp >= 8); + + DBG(("%s: src=(%d, %d), dst=(%d, %d), size=%dx%d, pitch=%d/%d, bpp=%d, and=%x, xor=%x\n", + __FUNCTION__, + src_x, src_y, dst_x, dst_y, + width, height, + src_stride, dst_stride, + bpp, and, or)); + + bpp /= 8; + src_bytes = (uint8_t *)src + src_stride * src_y + src_x * bpp; + dst_bytes = (uint8_t *)dst + dst_stride * dst_y + dst_x * bpp; + + if (and == 0xffffffff) { + switch (bpp) { + case 1: + do { + for (i = 0; i < width; i++) + dst_bytes[i] = src_bytes[i] | or; + + src_bytes += src_stride; + dst_bytes += dst_stride; + } while (--height); + break; + + case 2: + do { + uint16_t *d = (uint16_t *)dst_bytes; + uint16_t *s = (uint16_t *)src_bytes; + + for (i = 0; i < width; i++) + d[i] = s[i] | or; + + src_bytes += src_stride; + dst_bytes += dst_stride; + } while (--height); + break; + + case 4: + do { + uint32_t *d = (uint32_t *)dst_bytes; + uint32_t *s = (uint32_t *)src_bytes; + + for (i = 0; i < width; i++) + d[i] = s[i] | or; + + src_bytes += src_stride; + dst_bytes += dst_stride; + } while (--height); + break; + } + } else { + switch (bpp) { + case 1: + do { + for (i = 0; i < width; i++) + dst_bytes[i] = (src_bytes[i] & and) | or; + + src_bytes += src_stride; + dst_bytes += dst_stride; + } while (--height); + break; + + case 2: + do { + uint16_t *d = (uint16_t *)dst_bytes; + uint16_t *s = (uint16_t *)src_bytes; + + for (i = 0; i < width; i++) + d[i] = (s[i] & and) | or; + + src_bytes += src_stride; + dst_bytes += dst_stride; + } while (--height); + break; + + case 4: + do { + uint32_t *d = (uint32_t *)dst_bytes; + uint32_t *s = (uint32_t *)src_bytes; + + for (i = 0; i < width; i++) + d[i] = (s[i] & and) | or; + + src_bytes += src_stride; + dst_bytes += dst_stride; + } while (--height); + break; + } + } +} diff --git a/src/sna/sna.h b/src/sna/sna.h index f16324e0..de4de5c8 100644 --- a/src/sna/sna.h +++ b/src/sna/sna.h @@ -685,6 +685,11 @@ void sna_write_boxes(struct sna *sna, PixmapPtr dst, struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, const void *src, int stride, int16_t src_dx, int16_t src_dy, const BoxRec *box, int n); +void sna_write_boxes__xor(struct sna *sna, PixmapPtr dst, + struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, + const void *src, int stride, int16_t src_dx, int16_t src_dy, + const BoxRec *box, int nbox, + uint32_t and, uint32_t or); struct kgem_bo *sna_replace(struct sna *sna, PixmapPtr pixmap, @@ -713,6 +718,14 @@ memcpy_blt(const void *src, void *dst, int bpp, int16_t dst_x, int16_t dst_y, uint16_t width, uint16_t height); +void +memcpy_xor(const void *src, void *dst, int bpp, + int32_t src_stride, int32_t dst_stride, + int16_t src_x, int16_t src_y, + int16_t dst_x, int16_t dst_y, + uint16_t width, uint16_t height, + uint32_t and, uint32_t or); + #define SNA_CREATE_FB 0x10 #define SNA_CREATE_SCRATCH 0x11 #define SNA_CREATE_GLYPH 0x12 diff --git a/src/sna/sna_blt.c b/src/sna/sna_blt.c index 5879e973..07771a90 100644 --- a/src/sna/sna_blt.c +++ b/src/sna/sna_blt.c @@ -307,6 +307,104 @@ static Bool sna_blt_copy_init(struct sna *sna, return TRUE; } +static Bool sna_blt_alpha_fixup_init(struct sna *sna, + struct sna_blt_state *blt, + struct kgem_bo *src, + struct kgem_bo *dst, + int bpp, uint32_t alpha) +{ + struct kgem *kgem = &sna->kgem; + + blt->bo[0] = src; + blt->bo[1] = dst; + + blt->cmd = XY_FULL_MONO_PATTERN_BLT; + blt->pitch[0] = src->pitch; + if (kgem->gen >= 40 && src->tiling) { + blt->cmd |= BLT_SRC_TILED; + blt->pitch[0] >>= 2; + } + assert(blt->pitch[0] < MAXSHORT); + + blt->pitch[1] = dst->pitch; + if (kgem->gen >= 40 && dst->tiling) { + blt->cmd |= BLT_DST_TILED; + blt->pitch[1] >>= 2; + } + assert(blt->pitch[1] < MAXSHORT); + + blt->overwrites = 1; + blt->br13 = (0xfc << 16) | blt->pitch[1]; + switch (bpp) { + default: assert(0); + case 32: blt->cmd |= BLT_WRITE_ALPHA | BLT_WRITE_RGB; + blt->br13 |= 1 << 25; /* RGB8888 */ + case 16: blt->br13 |= 1 << 24; /* RGB565 */ + case 8: break; + } + blt->pixel = alpha; + + kgem_set_mode(kgem, KGEM_BLT); + if (!kgem_check_bo_fenced(kgem, src, dst, NULL)) { + _kgem_submit(kgem); + _kgem_set_mode(kgem, KGEM_BLT); + } + + sna->blt_state.fill_bo = 0; + return TRUE; +} + +static void sna_blt_alpha_fixup_one(struct sna *sna, + const struct sna_blt_state *blt, + int src_x, int src_y, + int width, int height, + int dst_x, int dst_y) +{ + struct kgem *kgem = &sna->kgem; + uint32_t *b; + + DBG(("%s: (%d, %d) -> (%d, %d) x (%d, %d)\n", + __FUNCTION__, src_x, src_y, dst_x, dst_y, width, height)); + + assert(src_x >= 0); + assert(src_y >= 0); + assert((src_y + height) * blt->bo[0]->pitch <= blt->bo[0]->size); + assert(dst_x >= 0); + assert(dst_y >= 0); + assert((dst_y + height) * blt->bo[1]->pitch <= blt->bo[1]->size); + assert(width > 0); + assert(height > 0); + + if (!kgem_check_batch(kgem, 12) || !kgem_check_reloc(kgem, 2)) { + _kgem_submit(kgem); + _kgem_set_mode(kgem, KGEM_BLT); + } + + b = kgem->batch + kgem->nbatch; + b[0] = blt->cmd; + b[1] = blt->br13; + b[2] = (dst_y << 16) | dst_x; + b[3] = ((dst_y + height) << 16) | (dst_x + width); + b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, + blt->bo[1], + I915_GEM_DOMAIN_RENDER << 16 | + I915_GEM_DOMAIN_RENDER | + KGEM_RELOC_FENCED, + 0); + b[5] = blt->pitch[0]; + b[6] = (src_y << 16) | src_x; + b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, + blt->bo[0], + I915_GEM_DOMAIN_RENDER << 16 | + KGEM_RELOC_FENCED, + 0); + b[8] = blt->pixel; + b[9] = blt->pixel; + b[10] = 0; + b[11] = 0; + kgem->nbatch += 12; +} + static void sna_blt_copy_one(struct sna *sna, const struct sna_blt_state *blt, int src_x, int src_y, @@ -930,9 +1028,90 @@ static void blt_composite_copy_boxes(struct sna *sna, } while(--nbox); } +fastcall static void +blt_composite_copy_with_alpha(struct sna *sna, + const struct sna_composite_op *op, + const struct sna_composite_rectangles *r) +{ + int x1, x2, y1, y2; + int src_x, src_y; + + DBG(("%s: src=(%d, %d), dst=(%d, %d), size=(%d, %d)\n", + __FUNCTION__, + r->src.x, r->src.y, + r->dst.x, r->dst.y, + r->width, r->height)); + + /* XXX higher layer should have clipped? */ + + x1 = r->dst.x + op->dst.x; + y1 = r->dst.y + op->dst.y; + x2 = x1 + r->width; + y2 = y1 + r->height; + + src_x = r->src.x - x1; + src_y = r->src.y - y1; + + /* clip against dst */ + if (x1 < 0) + x1 = 0; + if (y1 < 0) + y1 = 0; + + if (x2 > op->dst.width) + x2 = op->dst.width; + + if (y2 > op->dst.height) + y2 = op->dst.height; + + DBG(("%s: box=(%d, %d), (%d, %d)\n", __FUNCTION__, x1, y1, x2, y2)); + + if (x2 <= x1 || y2 <= y1) + return; + + sna_blt_alpha_fixup_one(sna, &op->u.blt, + x1 + src_x, y1 + src_y, + x2 - x1, y2 - y1, + x1, y1); +} + +fastcall static void +blt_composite_copy_box_with_alpha(struct sna *sna, + const struct sna_composite_op *op, + const BoxRec *box) +{ + DBG(("%s: box (%d, %d), (%d, %d)\n", + __FUNCTION__, box->x1, box->y1, box->x2, box->y2)); + sna_blt_alpha_fixup_one(sna, &op->u.blt, + box->x1 + op->u.blt.sx, + box->y1 + op->u.blt.sy, + box->x2 - box->x1, + box->y2 - box->y1, + box->x1 + op->dst.x, + box->y1 + op->dst.y); +} + +static void +blt_composite_copy_boxes_with_alpha(struct sna *sna, + const struct sna_composite_op *op, + const BoxRec *box, int nbox) +{ + DBG(("%s: nbox=%d\n", __FUNCTION__, nbox)); + do { + DBG(("%s: box (%d, %d), (%d, %d)\n", + __FUNCTION__, box->x1, box->y1, box->x2, box->y2)); + sna_blt_alpha_fixup_one(sna, &op->u.blt, + box->x1 + op->u.blt.sx, box->y1 + op->u.blt.sy, + box->x2 - box->x1, box->y2 - box->y1, + box->x1 + op->dst.x, box->y1 + op->dst.y); + box++; + } while(--nbox); +} + static Bool prepare_blt_copy(struct sna *sna, - struct sna_composite_op *op) + struct sna_composite_op *op, + uint32_t alpha_fixup) { PixmapPtr src = op->u.blt.src_pixmap; struct sna_pixmap *priv = sna_pixmap(src); @@ -947,19 +1126,32 @@ prepare_blt_copy(struct sna *sna, DBG(("%s\n", __FUNCTION__)); - op->blt = blt_composite_copy; - op->box = blt_composite_copy_box; - op->boxes = blt_composite_copy_boxes; if (sna->kgem.gen >= 60) op->done = gen6_blt_copy_done; else op->done = blt_done; - return sna_blt_copy_init(sna, &op->u.blt, - priv->gpu_bo, - op->dst.bo, - src->drawable.bitsPerPixel, - GXcopy); + if (alpha_fixup) { + op->blt = blt_composite_copy_with_alpha; + op->box = blt_composite_copy_box_with_alpha; + op->boxes = blt_composite_copy_boxes_with_alpha; + + return sna_blt_alpha_fixup_init(sna, &op->u.blt, + priv->gpu_bo, + op->dst.bo, + src->drawable.bitsPerPixel, + alpha_fixup); + } else { + op->blt = blt_composite_copy; + op->box = blt_composite_copy_box; + op->boxes = blt_composite_copy_boxes; + + return sna_blt_copy_init(sna, &op->u.blt, + priv->gpu_bo, + op->dst.bo, + src->drawable.bitsPerPixel, + GXcopy); + } } static void blt_vmap_done(struct sna *sna, const struct sna_composite_op *op) @@ -1082,9 +1274,80 @@ static void blt_put_composite_boxes(struct sna *sna, } } +fastcall static void +blt_put_composite_with_alpha(struct sna *sna, + const struct sna_composite_op *op, + const struct sna_composite_rectangles *r) +{ + PixmapPtr dst = op->dst.pixmap; + PixmapPtr src = op->u.blt.src_pixmap; + struct sna_pixmap *dst_priv = sna_pixmap(dst); + int pitch = src->devKind; + char *data = src->devPrivate.ptr; + + int16_t dst_x = r->dst.x + op->dst.x; + int16_t dst_y = r->dst.y + op->dst.y; + int16_t src_x = r->src.x + op->u.blt.sx; + int16_t src_y = r->src.y + op->u.blt.sy; + BoxRec box; + + box.x1 = dst_x; + box.y1 = dst_y; + box.x2 = dst_x + r->width; + box.y2 = dst_y + r->height; + + sna_write_boxes__xor(sna, dst, + dst_priv->gpu_bo, 0, 0, + data, pitch, src_x, src_y, + &box, 1, + 0xffffffff, op->u.blt.pixel); +} + +fastcall static void +blt_put_composite_box_with_alpha(struct sna *sna, + const struct sna_composite_op *op, + const BoxRec *box) +{ + PixmapPtr src = op->u.blt.src_pixmap; + + DBG(("%s: src=(%d, %d), dst=(%d, %d)\n", __FUNCTION__, + op->u.blt.sx, op->u.blt.sy, + op->dst.x, op->dst.y)); + + sna_write_boxes__xor(sna, op->dst.pixmap, + op->dst.bo, op->dst.x, op->dst.y, + src->devPrivate.ptr, + src->devKind, + op->u.blt.sx, op->u.blt.sy, + box, 1, + 0xffffffff, op->u.blt.pixel); +} + +static void +blt_put_composite_boxes_with_alpha(struct sna *sna, + const struct sna_composite_op *op, + const BoxRec *box, int n) +{ + PixmapPtr src = op->u.blt.src_pixmap; + + DBG(("%s: src=(%d, %d), dst=(%d, %d), [(%d, %d), (%d, %d) x %d]\n", __FUNCTION__, + op->u.blt.sx, op->u.blt.sy, + op->dst.x, op->dst.y, + box->x1, box->y1, box->x2, box->y2, n)); + + sna_write_boxes__xor(sna, op->dst.pixmap, + op->dst.bo, op->dst.x, op->dst.y, + src->devPrivate.ptr, + src->devKind, + op->u.blt.sx, op->u.blt.sy, + box, n, + 0xffffffff, op->u.blt.pixel); +} + static Bool prepare_blt_put(struct sna *sna, - struct sna_composite_op *op) + struct sna_composite_op *op, + uint32_t alpha_fixup) { PixmapPtr src = op->u.blt.src_pixmap; struct sna_pixmap *priv = sna_pixmap(src); @@ -1105,26 +1368,43 @@ prepare_blt_put(struct sna *sna, free_bo = src_bo; } if (src_bo) { - op->blt = blt_composite_copy; - op->box = blt_composite_copy_box; - op->boxes = blt_composite_copy_boxes; - op->u.blt.src_pixmap = (void *)free_bo; op->done = blt_vmap_done; src_bo->pitch = src->devKind; - if (!sna_blt_copy_init(sna, &op->u.blt, - src_bo, op->dst.bo, - op->dst.pixmap->drawable.bitsPerPixel, - GXcopy)) - return FALSE; + if (alpha_fixup) { + op->blt = blt_composite_copy_with_alpha; + op->box = blt_composite_copy_box_with_alpha; + op->boxes = blt_composite_copy_boxes_with_alpha; + + return sna_blt_alpha_fixup_init(sna, &op->u.blt, + src_bo, op->dst.bo, + op->dst.pixmap->drawable.bitsPerPixel, + alpha_fixup); + } else { + op->blt = blt_composite_copy; + op->box = blt_composite_copy_box; + op->boxes = blt_composite_copy_boxes; + + return sna_blt_copy_init(sna, &op->u.blt, + src_bo, op->dst.bo, + op->dst.pixmap->drawable.bitsPerPixel, + GXcopy); + } } else { if (!sna_pixmap_move_to_cpu(src, MOVE_READ)) return FALSE; - op->blt = blt_put_composite; - op->box = blt_put_composite_box; - op->boxes = blt_put_composite_boxes; + if (alpha_fixup) { + op->u.blt.pixel = alpha_fixup; + op->blt = blt_put_composite_with_alpha; + op->box = blt_put_composite_box_with_alpha; + op->boxes = blt_put_composite_boxes_with_alpha; + } else { + op->blt = blt_put_composite; + op->box = blt_put_composite_box; + op->boxes = blt_put_composite_boxes; + } op->done = nop_done; } @@ -1209,6 +1489,13 @@ reduce_damage(struct sna_composite_op *op, op->damage = NULL; } +#define alphaless(format) PICT_FORMAT(PICT_FORMAT_BPP(format), \ + PICT_FORMAT_TYPE(format), \ + 0, \ + PICT_FORMAT_R(format), \ + PICT_FORMAT_G(format), \ + PICT_FORMAT_B(format)) + Bool sna_blt_composite(struct sna *sna, uint32_t op, @@ -1223,6 +1510,7 @@ sna_blt_composite(struct sna *sna, PictFormat src_format = src->format; struct sna_pixmap *priv; int16_t tx, ty; + uint32_t alpha_fixup; Bool ret; #if DEBUG_NO_BLT || NO_BLT_COMPOSITE @@ -1309,13 +1597,13 @@ sna_blt_composite(struct sna *sna, return FALSE; } + alpha_fixup = 0; if (!(dst->format == src_format || - dst->format == PICT_FORMAT(PICT_FORMAT_BPP(src_format), - PICT_FORMAT_TYPE(src_format), - 0, - PICT_FORMAT_R(src_format), - PICT_FORMAT_G(src_format), - PICT_FORMAT_B(src_format)))) { + dst->format == alphaless(src_format) || + (alphaless(dst->format) == alphaless(src_format) && + sna_get_pixel_from_rgba(&alpha_fixup, + 0, 0, 0, 0xffff, + dst->format)))) { DBG(("%s: incompatible src/dst formats src=%08x, dst=%08x\n", __FUNCTION__, (unsigned)src_format, dst->format)); return FALSE; @@ -1349,18 +1637,18 @@ sna_blt_composite(struct sna *sna, tmp->u.blt.sx = x - dst_x; tmp->u.blt.sy = y - dst_y; - DBG(("%s: blt dst offset (%d, %d), source offset (%d, %d)\n", + DBG(("%s: blt dst offset (%d, %d), source offset (%d, %d), with alpha fixup? %x\n", __FUNCTION__, - tmp->dst.x, tmp->dst.y, tmp->u.blt.sx, tmp->u.blt.sy)); + tmp->dst.x, tmp->dst.y, tmp->u.blt.sx, tmp->u.blt.sy, alpha_fixup)); if (has_gpu_area(blt->src_pixmap, x, y, width, height)) - ret = prepare_blt_copy(sna, tmp); + ret = prepare_blt_copy(sna, tmp, alpha_fixup); else if (has_cpu_area(blt->src_pixmap, x, y, width, height)) - ret = prepare_blt_put(sna, tmp); + ret = prepare_blt_put(sna, tmp, alpha_fixup); else if (sna_pixmap_move_to_gpu(blt->src_pixmap, MOVE_READ)) - ret = prepare_blt_copy(sna, tmp); + ret = prepare_blt_copy(sna, tmp, alpha_fixup); else - ret = prepare_blt_put(sna, tmp); + ret = prepare_blt_put(sna, tmp, alpha_fixup); return ret; } diff --git a/src/sna/sna_io.c b/src/sna/sna_io.c index c5e66f14..aef3f509 100644 --- a/src/sna/sna_io.c +++ b/src/sna/sna_io.c @@ -504,6 +504,247 @@ fallback: sna->blt_state.fill_bo = 0; } +static void +write_boxes_inplace__xor(struct kgem *kgem, + const void *src, int stride, int bpp, int16_t src_dx, int16_t src_dy, + struct kgem_bo *bo, int16_t dst_dx, int16_t dst_dy, + const BoxRec *box, int n, + uint32_t and, uint32_t or) +{ + int dst_pitch = bo->pitch; + int src_pitch = stride; + void *dst; + + DBG(("%s x %d, tiling=%d\n", __FUNCTION__, n, bo->tiling)); + + kgem_bo_submit(kgem, bo); + + dst = kgem_bo_map(kgem, bo, PROT_READ | PROT_WRITE); + if (dst == NULL) + return; + + do { + DBG(("%s: (%d, %d) -> (%d, %d) x (%d, %d) [bpp=%d, src_pitch=%d, dst_pitch=%d]\n", __FUNCTION__, + box->x1 + src_dx, box->y1 + src_dy, + box->x1 + dst_dx, box->y1 + dst_dy, + box->x2 - box->x1, box->y2 - box->y1, + bpp, src_pitch, dst_pitch)); + + memcpy_xor(src, dst, bpp, + src_pitch, dst_pitch, + box->x1 + src_dx, box->y1 + src_dy, + box->x1 + dst_dx, box->y1 + dst_dy, + box->x2 - box->x1, box->y2 - box->y1, + and, or); + box++; + } while (--n); +} + +void sna_write_boxes__xor(struct sna *sna, PixmapPtr dst, + struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy, + const void *src, int stride, int16_t src_dx, int16_t src_dy, + const BoxRec *box, int nbox, + uint32_t and, uint32_t or) +{ + struct kgem *kgem = &sna->kgem; + struct kgem_bo *src_bo; + void *ptr; + int offset; + int n, cmd, br13; + + DBG(("%s x %d\n", __FUNCTION__, nbox)); + + if (DEBUG_NO_IO || kgem->wedged || + !kgem_bo_map_will_stall(kgem, dst_bo)) { +fallback: + write_boxes_inplace__xor(kgem, + src, stride, dst->drawable.bitsPerPixel, src_dx, src_dy, + dst_bo, dst_dx, dst_dy, + box, nbox, + and, or); + return; + } + + /* Try to avoid switching rings... */ + if (dst_bo->tiling == I915_TILING_Y || kgem->ring == KGEM_RENDER) { + PixmapRec tmp; + BoxRec extents; + + /* XXX Composite? Not that we should ever reach here! */ + + extents = box[0]; + for (n = 1; n < nbox; n++) { + if (box[n].x1 < extents.x1) + extents.x1 = box[n].x1; + if (box[n].x2 > extents.x2) + extents.x2 = box[n].x2; + + if (box[n].y1 < extents.y1) + extents.y1 = box[n].y1; + if (box[n].y2 > extents.y2) + extents.y2 = box[n].y2; + } + + tmp.drawable.width = extents.x2 - extents.x1; + tmp.drawable.height = extents.y2 - extents.y1; + tmp.drawable.depth = dst->drawable.depth; + tmp.drawable.bitsPerPixel = dst->drawable.bitsPerPixel; + tmp.devPrivate.ptr = NULL; + + assert(tmp.drawable.width); + assert(tmp.drawable.height); + + tmp.devKind = tmp.drawable.width * tmp.drawable.bitsPerPixel / 8; + tmp.devKind = ALIGN(tmp.devKind, 4); + + src_bo = kgem_create_buffer(kgem, + tmp.drawable.height * tmp.devKind, + KGEM_BUFFER_WRITE, + &ptr); + if (!src_bo) + goto fallback; + + src_bo->pitch = tmp.devKind; + + for (n = 0; n < nbox; n++) { + memcpy_xor(src, ptr, tmp.drawable.bitsPerPixel, + stride, tmp.devKind, + box[n].x1 + src_dx, + box[n].y1 + src_dy, + box[n].x1 - extents.x1, + box[n].y1 - extents.y1, + box[n].x2 - box[n].x1, + box[n].y2 - box[n].y1, + and, or); + } + + n = sna->render.copy_boxes(sna, GXcopy, + &tmp, src_bo, -extents.x1, -extents.y1, + dst, dst_bo, dst_dx, dst_dy, + box, nbox); + + kgem_bo_destroy(&sna->kgem, src_bo); + + if (!n) + goto fallback; + + return; + } + + cmd = XY_SRC_COPY_BLT_CMD; + br13 = dst_bo->pitch; + if (kgem->gen >= 40 && dst_bo->tiling) { + cmd |= BLT_DST_TILED; + br13 >>= 2; + } + br13 |= 0xcc << 16; + switch (dst->drawable.bitsPerPixel) { + default: + case 32: cmd |= BLT_WRITE_ALPHA | BLT_WRITE_RGB; + br13 |= 1 << 25; /* RGB8888 */ + case 16: br13 |= 1 << 24; /* RGB565 */ + case 8: break; + } + + kgem_set_mode(kgem, KGEM_BLT); + if (kgem->nexec + 2 > KGEM_EXEC_SIZE(kgem) || + kgem->nreloc + 2 > KGEM_RELOC_SIZE(kgem) || + !kgem_check_batch(kgem, 8) || + !kgem_check_bo_fenced(kgem, dst_bo, NULL)) { + _kgem_submit(kgem); + _kgem_set_mode(kgem, KGEM_BLT); + } + + do { + int nbox_this_time; + + nbox_this_time = nbox; + if (8*nbox_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) + nbox_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 8; + if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc) + nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc) / 2; + assert(nbox_this_time); + nbox -= nbox_this_time; + + /* Count the total number of bytes to be read and allocate a + * single buffer large enough. Or if it is very small, combine + * with other allocations. */ + offset = 0; + for (n = 0; n < nbox_this_time; n++) { + int height = box[n].y2 - box[n].y1; + int width = box[n].x2 - box[n].x1; + offset += PITCH(width, dst->drawable.bitsPerPixel >> 3) * height; + } + + src_bo = kgem_create_buffer(kgem, offset, + KGEM_BUFFER_WRITE | (nbox ? KGEM_BUFFER_LAST : 0), + &ptr); + if (!src_bo) + break; + + offset = 0; + do { + int height = box->y2 - box->y1; + int width = box->x2 - box->x1; + int pitch = PITCH(width, dst->drawable.bitsPerPixel >> 3); + uint32_t *b; + + DBG((" %s: box src=(%d, %d), dst=(%d, %d) size=(%d, %d), dst offset=%d, dst pitch=%d\n", + __FUNCTION__, + box->x1 + src_dx, box->y1 + src_dy, + box->x1 + dst_dx, box->y1 + dst_dy, + width, height, + offset, pitch)); + + assert(box->x1 + src_dx >= 0); + assert((box->x2 + src_dx)*dst->drawable.bitsPerPixel <= 8*stride); + assert(box->y1 + src_dy >= 0); + + assert(box->x1 + dst_dx >= 0); + assert(box->y1 + dst_dy >= 0); + + memcpy_xor(src, (char *)ptr + offset, + dst->drawable.bitsPerPixel, + stride, pitch, + box->x1 + src_dx, box->y1 + src_dy, + 0, 0, + width, height, + and, or); + + b = kgem->batch + kgem->nbatch; + b[0] = cmd; + b[1] = br13; + b[2] = (box->y1 + dst_dy) << 16 | (box->x1 + dst_dx); + b[3] = (box->y2 + dst_dy) << 16 | (box->x2 + dst_dx); + b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst_bo, + I915_GEM_DOMAIN_RENDER << 16 | + I915_GEM_DOMAIN_RENDER | + KGEM_RELOC_FENCED, + 0); + b[5] = 0; + b[6] = pitch; + b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src_bo, + I915_GEM_DOMAIN_RENDER << 16 | + KGEM_RELOC_FENCED, + offset); + kgem->nbatch += 8; + + box++; + offset += pitch * height; + } while (--nbox_this_time); + assert(offset == src_bo->size); + + if (nbox) { + _kgem_submit(kgem); + _kgem_set_mode(kgem, KGEM_BLT); + } + + kgem_bo_destroy(kgem, src_bo); + } while (nbox); + + sna->blt_state.fill_bo = 0; +} + struct kgem_bo *sna_replace(struct sna *sna, PixmapPtr pixmap, struct kgem_bo *bo, diff --git a/src/sna/sna_reg.h b/src/sna/sna_reg.h index ff2ff3b7..551d64b0 100644 --- a/src/sna/sna_reg.h +++ b/src/sna/sna_reg.h @@ -55,6 +55,7 @@ #define XY_MONO_PAT ((0x2<<29)|(0x52<<22)|0x7) #define XY_MONO_SRC_COPY ((0x2<<29)|(0x54<<22)|(0x6)) #define XY_MONO_SRC_COPY_IMM ((0x2<<29)|(0x71<<22)) +#define XY_FULL_MONO_PATTERN_BLT ((0x2<<29)|(0x57<<22)|0xa) #define XY_FULL_MONO_PATTERN_MONO_SRC_BLT ((0x2<<29)|(0x58<<22)|0xa) /* FLUSH commands */ |