summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorChris Wilson <chris@chris-wilson.co.uk>2012-01-06 15:26:11 +0000
committerChris Wilson <chris@chris-wilson.co.uk>2012-01-06 17:50:01 +0000
commit9f1935bb4e894264053d94e53c99d5ad607700fb (patch)
tree689fdc95dda28ba227cf26d9ee2347a303962bb8 /src
parent141001df6c9c3485c500ed531a214c09b46c1d3b (diff)
sna: Support performing alpha-fixup on the source
By inlining the swizzling of the alpha-channel we can support BLT copies from an alpha-less pixmap to an alpha-destination. Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Diffstat (limited to 'src')
-rw-r--r--src/sna/blt.c105
-rw-r--r--src/sna/sna.h13
-rw-r--r--src/sna/sna_blt.c356
-rw-r--r--src/sna/sna_io.c241
-rw-r--r--src/sna/sna_reg.h1
5 files changed, 682 insertions, 34 deletions
diff --git a/src/sna/blt.c b/src/sna/blt.c
index 7a77fa49..d28ad985 100644
--- a/src/sna/blt.c
+++ b/src/sna/blt.c
@@ -106,3 +106,108 @@ memcpy_blt(const void *src, void *dst, int bpp,
break;
}
}
+
+void
+memcpy_xor(const void *src, void *dst, int bpp,
+ int32_t src_stride, int32_t dst_stride,
+ int16_t src_x, int16_t src_y,
+ int16_t dst_x, int16_t dst_y,
+ uint16_t width, uint16_t height,
+ uint32_t and, uint32_t or)
+{
+ uint8_t *src_bytes;
+ uint8_t *dst_bytes;
+ int i;
+
+ assert(width && height);
+ assert(bpp >= 8);
+
+ DBG(("%s: src=(%d, %d), dst=(%d, %d), size=%dx%d, pitch=%d/%d, bpp=%d, and=%x, xor=%x\n",
+ __FUNCTION__,
+ src_x, src_y, dst_x, dst_y,
+ width, height,
+ src_stride, dst_stride,
+ bpp, and, or));
+
+ bpp /= 8;
+ src_bytes = (uint8_t *)src + src_stride * src_y + src_x * bpp;
+ dst_bytes = (uint8_t *)dst + dst_stride * dst_y + dst_x * bpp;
+
+ if (and == 0xffffffff) {
+ switch (bpp) {
+ case 1:
+ do {
+ for (i = 0; i < width; i++)
+ dst_bytes[i] = src_bytes[i] | or;
+
+ src_bytes += src_stride;
+ dst_bytes += dst_stride;
+ } while (--height);
+ break;
+
+ case 2:
+ do {
+ uint16_t *d = (uint16_t *)dst_bytes;
+ uint16_t *s = (uint16_t *)src_bytes;
+
+ for (i = 0; i < width; i++)
+ d[i] = s[i] | or;
+
+ src_bytes += src_stride;
+ dst_bytes += dst_stride;
+ } while (--height);
+ break;
+
+ case 4:
+ do {
+ uint32_t *d = (uint32_t *)dst_bytes;
+ uint32_t *s = (uint32_t *)src_bytes;
+
+ for (i = 0; i < width; i++)
+ d[i] = s[i] | or;
+
+ src_bytes += src_stride;
+ dst_bytes += dst_stride;
+ } while (--height);
+ break;
+ }
+ } else {
+ switch (bpp) {
+ case 1:
+ do {
+ for (i = 0; i < width; i++)
+ dst_bytes[i] = (src_bytes[i] & and) | or;
+
+ src_bytes += src_stride;
+ dst_bytes += dst_stride;
+ } while (--height);
+ break;
+
+ case 2:
+ do {
+ uint16_t *d = (uint16_t *)dst_bytes;
+ uint16_t *s = (uint16_t *)src_bytes;
+
+ for (i = 0; i < width; i++)
+ d[i] = (s[i] & and) | or;
+
+ src_bytes += src_stride;
+ dst_bytes += dst_stride;
+ } while (--height);
+ break;
+
+ case 4:
+ do {
+ uint32_t *d = (uint32_t *)dst_bytes;
+ uint32_t *s = (uint32_t *)src_bytes;
+
+ for (i = 0; i < width; i++)
+ d[i] = (s[i] & and) | or;
+
+ src_bytes += src_stride;
+ dst_bytes += dst_stride;
+ } while (--height);
+ break;
+ }
+ }
+}
diff --git a/src/sna/sna.h b/src/sna/sna.h
index f16324e0..de4de5c8 100644
--- a/src/sna/sna.h
+++ b/src/sna/sna.h
@@ -685,6 +685,11 @@ void sna_write_boxes(struct sna *sna, PixmapPtr dst,
struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
const void *src, int stride, int16_t src_dx, int16_t src_dy,
const BoxRec *box, int n);
+void sna_write_boxes__xor(struct sna *sna, PixmapPtr dst,
+ struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
+ const void *src, int stride, int16_t src_dx, int16_t src_dy,
+ const BoxRec *box, int nbox,
+ uint32_t and, uint32_t or);
struct kgem_bo *sna_replace(struct sna *sna,
PixmapPtr pixmap,
@@ -713,6 +718,14 @@ memcpy_blt(const void *src, void *dst, int bpp,
int16_t dst_x, int16_t dst_y,
uint16_t width, uint16_t height);
+void
+memcpy_xor(const void *src, void *dst, int bpp,
+ int32_t src_stride, int32_t dst_stride,
+ int16_t src_x, int16_t src_y,
+ int16_t dst_x, int16_t dst_y,
+ uint16_t width, uint16_t height,
+ uint32_t and, uint32_t or);
+
#define SNA_CREATE_FB 0x10
#define SNA_CREATE_SCRATCH 0x11
#define SNA_CREATE_GLYPH 0x12
diff --git a/src/sna/sna_blt.c b/src/sna/sna_blt.c
index 5879e973..07771a90 100644
--- a/src/sna/sna_blt.c
+++ b/src/sna/sna_blt.c
@@ -307,6 +307,104 @@ static Bool sna_blt_copy_init(struct sna *sna,
return TRUE;
}
+static Bool sna_blt_alpha_fixup_init(struct sna *sna,
+ struct sna_blt_state *blt,
+ struct kgem_bo *src,
+ struct kgem_bo *dst,
+ int bpp, uint32_t alpha)
+{
+ struct kgem *kgem = &sna->kgem;
+
+ blt->bo[0] = src;
+ blt->bo[1] = dst;
+
+ blt->cmd = XY_FULL_MONO_PATTERN_BLT;
+ blt->pitch[0] = src->pitch;
+ if (kgem->gen >= 40 && src->tiling) {
+ blt->cmd |= BLT_SRC_TILED;
+ blt->pitch[0] >>= 2;
+ }
+ assert(blt->pitch[0] < MAXSHORT);
+
+ blt->pitch[1] = dst->pitch;
+ if (kgem->gen >= 40 && dst->tiling) {
+ blt->cmd |= BLT_DST_TILED;
+ blt->pitch[1] >>= 2;
+ }
+ assert(blt->pitch[1] < MAXSHORT);
+
+ blt->overwrites = 1;
+ blt->br13 = (0xfc << 16) | blt->pitch[1];
+ switch (bpp) {
+ default: assert(0);
+ case 32: blt->cmd |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
+ blt->br13 |= 1 << 25; /* RGB8888 */
+ case 16: blt->br13 |= 1 << 24; /* RGB565 */
+ case 8: break;
+ }
+ blt->pixel = alpha;
+
+ kgem_set_mode(kgem, KGEM_BLT);
+ if (!kgem_check_bo_fenced(kgem, src, dst, NULL)) {
+ _kgem_submit(kgem);
+ _kgem_set_mode(kgem, KGEM_BLT);
+ }
+
+ sna->blt_state.fill_bo = 0;
+ return TRUE;
+}
+
+static void sna_blt_alpha_fixup_one(struct sna *sna,
+ const struct sna_blt_state *blt,
+ int src_x, int src_y,
+ int width, int height,
+ int dst_x, int dst_y)
+{
+ struct kgem *kgem = &sna->kgem;
+ uint32_t *b;
+
+ DBG(("%s: (%d, %d) -> (%d, %d) x (%d, %d)\n",
+ __FUNCTION__, src_x, src_y, dst_x, dst_y, width, height));
+
+ assert(src_x >= 0);
+ assert(src_y >= 0);
+ assert((src_y + height) * blt->bo[0]->pitch <= blt->bo[0]->size);
+ assert(dst_x >= 0);
+ assert(dst_y >= 0);
+ assert((dst_y + height) * blt->bo[1]->pitch <= blt->bo[1]->size);
+ assert(width > 0);
+ assert(height > 0);
+
+ if (!kgem_check_batch(kgem, 12) || !kgem_check_reloc(kgem, 2)) {
+ _kgem_submit(kgem);
+ _kgem_set_mode(kgem, KGEM_BLT);
+ }
+
+ b = kgem->batch + kgem->nbatch;
+ b[0] = blt->cmd;
+ b[1] = blt->br13;
+ b[2] = (dst_y << 16) | dst_x;
+ b[3] = ((dst_y + height) << 16) | (dst_x + width);
+ b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4,
+ blt->bo[1],
+ I915_GEM_DOMAIN_RENDER << 16 |
+ I915_GEM_DOMAIN_RENDER |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[5] = blt->pitch[0];
+ b[6] = (src_y << 16) | src_x;
+ b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7,
+ blt->bo[0],
+ I915_GEM_DOMAIN_RENDER << 16 |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[8] = blt->pixel;
+ b[9] = blt->pixel;
+ b[10] = 0;
+ b[11] = 0;
+ kgem->nbatch += 12;
+}
+
static void sna_blt_copy_one(struct sna *sna,
const struct sna_blt_state *blt,
int src_x, int src_y,
@@ -930,9 +1028,90 @@ static void blt_composite_copy_boxes(struct sna *sna,
} while(--nbox);
}
+fastcall static void
+blt_composite_copy_with_alpha(struct sna *sna,
+ const struct sna_composite_op *op,
+ const struct sna_composite_rectangles *r)
+{
+ int x1, x2, y1, y2;
+ int src_x, src_y;
+
+ DBG(("%s: src=(%d, %d), dst=(%d, %d), size=(%d, %d)\n",
+ __FUNCTION__,
+ r->src.x, r->src.y,
+ r->dst.x, r->dst.y,
+ r->width, r->height));
+
+ /* XXX higher layer should have clipped? */
+
+ x1 = r->dst.x + op->dst.x;
+ y1 = r->dst.y + op->dst.y;
+ x2 = x1 + r->width;
+ y2 = y1 + r->height;
+
+ src_x = r->src.x - x1;
+ src_y = r->src.y - y1;
+
+ /* clip against dst */
+ if (x1 < 0)
+ x1 = 0;
+ if (y1 < 0)
+ y1 = 0;
+
+ if (x2 > op->dst.width)
+ x2 = op->dst.width;
+
+ if (y2 > op->dst.height)
+ y2 = op->dst.height;
+
+ DBG(("%s: box=(%d, %d), (%d, %d)\n", __FUNCTION__, x1, y1, x2, y2));
+
+ if (x2 <= x1 || y2 <= y1)
+ return;
+
+ sna_blt_alpha_fixup_one(sna, &op->u.blt,
+ x1 + src_x, y1 + src_y,
+ x2 - x1, y2 - y1,
+ x1, y1);
+}
+
+fastcall static void
+blt_composite_copy_box_with_alpha(struct sna *sna,
+ const struct sna_composite_op *op,
+ const BoxRec *box)
+{
+ DBG(("%s: box (%d, %d), (%d, %d)\n",
+ __FUNCTION__, box->x1, box->y1, box->x2, box->y2));
+ sna_blt_alpha_fixup_one(sna, &op->u.blt,
+ box->x1 + op->u.blt.sx,
+ box->y1 + op->u.blt.sy,
+ box->x2 - box->x1,
+ box->y2 - box->y1,
+ box->x1 + op->dst.x,
+ box->y1 + op->dst.y);
+}
+
+static void
+blt_composite_copy_boxes_with_alpha(struct sna *sna,
+ const struct sna_composite_op *op,
+ const BoxRec *box, int nbox)
+{
+ DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
+ do {
+ DBG(("%s: box (%d, %d), (%d, %d)\n",
+ __FUNCTION__, box->x1, box->y1, box->x2, box->y2));
+ sna_blt_alpha_fixup_one(sna, &op->u.blt,
+ box->x1 + op->u.blt.sx, box->y1 + op->u.blt.sy,
+ box->x2 - box->x1, box->y2 - box->y1,
+ box->x1 + op->dst.x, box->y1 + op->dst.y);
+ box++;
+ } while(--nbox);
+}
+
static Bool
prepare_blt_copy(struct sna *sna,
- struct sna_composite_op *op)
+ struct sna_composite_op *op,
+ uint32_t alpha_fixup)
{
PixmapPtr src = op->u.blt.src_pixmap;
struct sna_pixmap *priv = sna_pixmap(src);
@@ -947,19 +1126,32 @@ prepare_blt_copy(struct sna *sna,
DBG(("%s\n", __FUNCTION__));
- op->blt = blt_composite_copy;
- op->box = blt_composite_copy_box;
- op->boxes = blt_composite_copy_boxes;
if (sna->kgem.gen >= 60)
op->done = gen6_blt_copy_done;
else
op->done = blt_done;
- return sna_blt_copy_init(sna, &op->u.blt,
- priv->gpu_bo,
- op->dst.bo,
- src->drawable.bitsPerPixel,
- GXcopy);
+ if (alpha_fixup) {
+ op->blt = blt_composite_copy_with_alpha;
+ op->box = blt_composite_copy_box_with_alpha;
+ op->boxes = blt_composite_copy_boxes_with_alpha;
+
+ return sna_blt_alpha_fixup_init(sna, &op->u.blt,
+ priv->gpu_bo,
+ op->dst.bo,
+ src->drawable.bitsPerPixel,
+ alpha_fixup);
+ } else {
+ op->blt = blt_composite_copy;
+ op->box = blt_composite_copy_box;
+ op->boxes = blt_composite_copy_boxes;
+
+ return sna_blt_copy_init(sna, &op->u.blt,
+ priv->gpu_bo,
+ op->dst.bo,
+ src->drawable.bitsPerPixel,
+ GXcopy);
+ }
}
static void blt_vmap_done(struct sna *sna, const struct sna_composite_op *op)
@@ -1082,9 +1274,80 @@ static void blt_put_composite_boxes(struct sna *sna,
}
}
+fastcall static void
+blt_put_composite_with_alpha(struct sna *sna,
+ const struct sna_composite_op *op,
+ const struct sna_composite_rectangles *r)
+{
+ PixmapPtr dst = op->dst.pixmap;
+ PixmapPtr src = op->u.blt.src_pixmap;
+ struct sna_pixmap *dst_priv = sna_pixmap(dst);
+ int pitch = src->devKind;
+ char *data = src->devPrivate.ptr;
+
+ int16_t dst_x = r->dst.x + op->dst.x;
+ int16_t dst_y = r->dst.y + op->dst.y;
+ int16_t src_x = r->src.x + op->u.blt.sx;
+ int16_t src_y = r->src.y + op->u.blt.sy;
+ BoxRec box;
+
+ box.x1 = dst_x;
+ box.y1 = dst_y;
+ box.x2 = dst_x + r->width;
+ box.y2 = dst_y + r->height;
+
+ sna_write_boxes__xor(sna, dst,
+ dst_priv->gpu_bo, 0, 0,
+ data, pitch, src_x, src_y,
+ &box, 1,
+ 0xffffffff, op->u.blt.pixel);
+}
+
+fastcall static void
+blt_put_composite_box_with_alpha(struct sna *sna,
+ const struct sna_composite_op *op,
+ const BoxRec *box)
+{
+ PixmapPtr src = op->u.blt.src_pixmap;
+
+ DBG(("%s: src=(%d, %d), dst=(%d, %d)\n", __FUNCTION__,
+ op->u.blt.sx, op->u.blt.sy,
+ op->dst.x, op->dst.y));
+
+ sna_write_boxes__xor(sna, op->dst.pixmap,
+ op->dst.bo, op->dst.x, op->dst.y,
+ src->devPrivate.ptr,
+ src->devKind,
+ op->u.blt.sx, op->u.blt.sy,
+ box, 1,
+ 0xffffffff, op->u.blt.pixel);
+}
+
+static void
+blt_put_composite_boxes_with_alpha(struct sna *sna,
+ const struct sna_composite_op *op,
+ const BoxRec *box, int n)
+{
+ PixmapPtr src = op->u.blt.src_pixmap;
+
+ DBG(("%s: src=(%d, %d), dst=(%d, %d), [(%d, %d), (%d, %d) x %d]\n", __FUNCTION__,
+ op->u.blt.sx, op->u.blt.sy,
+ op->dst.x, op->dst.y,
+ box->x1, box->y1, box->x2, box->y2, n));
+
+ sna_write_boxes__xor(sna, op->dst.pixmap,
+ op->dst.bo, op->dst.x, op->dst.y,
+ src->devPrivate.ptr,
+ src->devKind,
+ op->u.blt.sx, op->u.blt.sy,
+ box, n,
+ 0xffffffff, op->u.blt.pixel);
+}
+
static Bool
prepare_blt_put(struct sna *sna,
- struct sna_composite_op *op)
+ struct sna_composite_op *op,
+ uint32_t alpha_fixup)
{
PixmapPtr src = op->u.blt.src_pixmap;
struct sna_pixmap *priv = sna_pixmap(src);
@@ -1105,26 +1368,43 @@ prepare_blt_put(struct sna *sna,
free_bo = src_bo;
}
if (src_bo) {
- op->blt = blt_composite_copy;
- op->box = blt_composite_copy_box;
- op->boxes = blt_composite_copy_boxes;
-
op->u.blt.src_pixmap = (void *)free_bo;
op->done = blt_vmap_done;
src_bo->pitch = src->devKind;
- if (!sna_blt_copy_init(sna, &op->u.blt,
- src_bo, op->dst.bo,
- op->dst.pixmap->drawable.bitsPerPixel,
- GXcopy))
- return FALSE;
+ if (alpha_fixup) {
+ op->blt = blt_composite_copy_with_alpha;
+ op->box = blt_composite_copy_box_with_alpha;
+ op->boxes = blt_composite_copy_boxes_with_alpha;
+
+ return sna_blt_alpha_fixup_init(sna, &op->u.blt,
+ src_bo, op->dst.bo,
+ op->dst.pixmap->drawable.bitsPerPixel,
+ alpha_fixup);
+ } else {
+ op->blt = blt_composite_copy;
+ op->box = blt_composite_copy_box;
+ op->boxes = blt_composite_copy_boxes;
+
+ return sna_blt_copy_init(sna, &op->u.blt,
+ src_bo, op->dst.bo,
+ op->dst.pixmap->drawable.bitsPerPixel,
+ GXcopy);
+ }
} else {
if (!sna_pixmap_move_to_cpu(src, MOVE_READ))
return FALSE;
- op->blt = blt_put_composite;
- op->box = blt_put_composite_box;
- op->boxes = blt_put_composite_boxes;
+ if (alpha_fixup) {
+ op->u.blt.pixel = alpha_fixup;
+ op->blt = blt_put_composite_with_alpha;
+ op->box = blt_put_composite_box_with_alpha;
+ op->boxes = blt_put_composite_boxes_with_alpha;
+ } else {
+ op->blt = blt_put_composite;
+ op->box = blt_put_composite_box;
+ op->boxes = blt_put_composite_boxes;
+ }
op->done = nop_done;
}
@@ -1209,6 +1489,13 @@ reduce_damage(struct sna_composite_op *op,
op->damage = NULL;
}
+#define alphaless(format) PICT_FORMAT(PICT_FORMAT_BPP(format), \
+ PICT_FORMAT_TYPE(format), \
+ 0, \
+ PICT_FORMAT_R(format), \
+ PICT_FORMAT_G(format), \
+ PICT_FORMAT_B(format))
+
Bool
sna_blt_composite(struct sna *sna,
uint32_t op,
@@ -1223,6 +1510,7 @@ sna_blt_composite(struct sna *sna,
PictFormat src_format = src->format;
struct sna_pixmap *priv;
int16_t tx, ty;
+ uint32_t alpha_fixup;
Bool ret;
#if DEBUG_NO_BLT || NO_BLT_COMPOSITE
@@ -1309,13 +1597,13 @@ sna_blt_composite(struct sna *sna,
return FALSE;
}
+ alpha_fixup = 0;
if (!(dst->format == src_format ||
- dst->format == PICT_FORMAT(PICT_FORMAT_BPP(src_format),
- PICT_FORMAT_TYPE(src_format),
- 0,
- PICT_FORMAT_R(src_format),
- PICT_FORMAT_G(src_format),
- PICT_FORMAT_B(src_format)))) {
+ dst->format == alphaless(src_format) ||
+ (alphaless(dst->format) == alphaless(src_format) &&
+ sna_get_pixel_from_rgba(&alpha_fixup,
+ 0, 0, 0, 0xffff,
+ dst->format)))) {
DBG(("%s: incompatible src/dst formats src=%08x, dst=%08x\n",
__FUNCTION__, (unsigned)src_format, dst->format));
return FALSE;
@@ -1349,18 +1637,18 @@ sna_blt_composite(struct sna *sna,
tmp->u.blt.sx = x - dst_x;
tmp->u.blt.sy = y - dst_y;
- DBG(("%s: blt dst offset (%d, %d), source offset (%d, %d)\n",
+ DBG(("%s: blt dst offset (%d, %d), source offset (%d, %d), with alpha fixup? %x\n",
__FUNCTION__,
- tmp->dst.x, tmp->dst.y, tmp->u.blt.sx, tmp->u.blt.sy));
+ tmp->dst.x, tmp->dst.y, tmp->u.blt.sx, tmp->u.blt.sy, alpha_fixup));
if (has_gpu_area(blt->src_pixmap, x, y, width, height))
- ret = prepare_blt_copy(sna, tmp);
+ ret = prepare_blt_copy(sna, tmp, alpha_fixup);
else if (has_cpu_area(blt->src_pixmap, x, y, width, height))
- ret = prepare_blt_put(sna, tmp);
+ ret = prepare_blt_put(sna, tmp, alpha_fixup);
else if (sna_pixmap_move_to_gpu(blt->src_pixmap, MOVE_READ))
- ret = prepare_blt_copy(sna, tmp);
+ ret = prepare_blt_copy(sna, tmp, alpha_fixup);
else
- ret = prepare_blt_put(sna, tmp);
+ ret = prepare_blt_put(sna, tmp, alpha_fixup);
return ret;
}
diff --git a/src/sna/sna_io.c b/src/sna/sna_io.c
index c5e66f14..aef3f509 100644
--- a/src/sna/sna_io.c
+++ b/src/sna/sna_io.c
@@ -504,6 +504,247 @@ fallback:
sna->blt_state.fill_bo = 0;
}
+static void
+write_boxes_inplace__xor(struct kgem *kgem,
+ const void *src, int stride, int bpp, int16_t src_dx, int16_t src_dy,
+ struct kgem_bo *bo, int16_t dst_dx, int16_t dst_dy,
+ const BoxRec *box, int n,
+ uint32_t and, uint32_t or)
+{
+ int dst_pitch = bo->pitch;
+ int src_pitch = stride;
+ void *dst;
+
+ DBG(("%s x %d, tiling=%d\n", __FUNCTION__, n, bo->tiling));
+
+ kgem_bo_submit(kgem, bo);
+
+ dst = kgem_bo_map(kgem, bo, PROT_READ | PROT_WRITE);
+ if (dst == NULL)
+ return;
+
+ do {
+ DBG(("%s: (%d, %d) -> (%d, %d) x (%d, %d) [bpp=%d, src_pitch=%d, dst_pitch=%d]\n", __FUNCTION__,
+ box->x1 + src_dx, box->y1 + src_dy,
+ box->x1 + dst_dx, box->y1 + dst_dy,
+ box->x2 - box->x1, box->y2 - box->y1,
+ bpp, src_pitch, dst_pitch));
+
+ memcpy_xor(src, dst, bpp,
+ src_pitch, dst_pitch,
+ box->x1 + src_dx, box->y1 + src_dy,
+ box->x1 + dst_dx, box->y1 + dst_dy,
+ box->x2 - box->x1, box->y2 - box->y1,
+ and, or);
+ box++;
+ } while (--n);
+}
+
+void sna_write_boxes__xor(struct sna *sna, PixmapPtr dst,
+ struct kgem_bo *dst_bo, int16_t dst_dx, int16_t dst_dy,
+ const void *src, int stride, int16_t src_dx, int16_t src_dy,
+ const BoxRec *box, int nbox,
+ uint32_t and, uint32_t or)
+{
+ struct kgem *kgem = &sna->kgem;
+ struct kgem_bo *src_bo;
+ void *ptr;
+ int offset;
+ int n, cmd, br13;
+
+ DBG(("%s x %d\n", __FUNCTION__, nbox));
+
+ if (DEBUG_NO_IO || kgem->wedged ||
+ !kgem_bo_map_will_stall(kgem, dst_bo)) {
+fallback:
+ write_boxes_inplace__xor(kgem,
+ src, stride, dst->drawable.bitsPerPixel, src_dx, src_dy,
+ dst_bo, dst_dx, dst_dy,
+ box, nbox,
+ and, or);
+ return;
+ }
+
+ /* Try to avoid switching rings... */
+ if (dst_bo->tiling == I915_TILING_Y || kgem->ring == KGEM_RENDER) {
+ PixmapRec tmp;
+ BoxRec extents;
+
+ /* XXX Composite? Not that we should ever reach here! */
+
+ extents = box[0];
+ for (n = 1; n < nbox; n++) {
+ if (box[n].x1 < extents.x1)
+ extents.x1 = box[n].x1;
+ if (box[n].x2 > extents.x2)
+ extents.x2 = box[n].x2;
+
+ if (box[n].y1 < extents.y1)
+ extents.y1 = box[n].y1;
+ if (box[n].y2 > extents.y2)
+ extents.y2 = box[n].y2;
+ }
+
+ tmp.drawable.width = extents.x2 - extents.x1;
+ tmp.drawable.height = extents.y2 - extents.y1;
+ tmp.drawable.depth = dst->drawable.depth;
+ tmp.drawable.bitsPerPixel = dst->drawable.bitsPerPixel;
+ tmp.devPrivate.ptr = NULL;
+
+ assert(tmp.drawable.width);
+ assert(tmp.drawable.height);
+
+ tmp.devKind = tmp.drawable.width * tmp.drawable.bitsPerPixel / 8;
+ tmp.devKind = ALIGN(tmp.devKind, 4);
+
+ src_bo = kgem_create_buffer(kgem,
+ tmp.drawable.height * tmp.devKind,
+ KGEM_BUFFER_WRITE,
+ &ptr);
+ if (!src_bo)
+ goto fallback;
+
+ src_bo->pitch = tmp.devKind;
+
+ for (n = 0; n < nbox; n++) {
+ memcpy_xor(src, ptr, tmp.drawable.bitsPerPixel,
+ stride, tmp.devKind,
+ box[n].x1 + src_dx,
+ box[n].y1 + src_dy,
+ box[n].x1 - extents.x1,
+ box[n].y1 - extents.y1,
+ box[n].x2 - box[n].x1,
+ box[n].y2 - box[n].y1,
+ and, or);
+ }
+
+ n = sna->render.copy_boxes(sna, GXcopy,
+ &tmp, src_bo, -extents.x1, -extents.y1,
+ dst, dst_bo, dst_dx, dst_dy,
+ box, nbox);
+
+ kgem_bo_destroy(&sna->kgem, src_bo);
+
+ if (!n)
+ goto fallback;
+
+ return;
+ }
+
+ cmd = XY_SRC_COPY_BLT_CMD;
+ br13 = dst_bo->pitch;
+ if (kgem->gen >= 40 && dst_bo->tiling) {
+ cmd |= BLT_DST_TILED;
+ br13 >>= 2;
+ }
+ br13 |= 0xcc << 16;
+ switch (dst->drawable.bitsPerPixel) {
+ default:
+ case 32: cmd |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
+ br13 |= 1 << 25; /* RGB8888 */
+ case 16: br13 |= 1 << 24; /* RGB565 */
+ case 8: break;
+ }
+
+ kgem_set_mode(kgem, KGEM_BLT);
+ if (kgem->nexec + 2 > KGEM_EXEC_SIZE(kgem) ||
+ kgem->nreloc + 2 > KGEM_RELOC_SIZE(kgem) ||
+ !kgem_check_batch(kgem, 8) ||
+ !kgem_check_bo_fenced(kgem, dst_bo, NULL)) {
+ _kgem_submit(kgem);
+ _kgem_set_mode(kgem, KGEM_BLT);
+ }
+
+ do {
+ int nbox_this_time;
+
+ nbox_this_time = nbox;
+ if (8*nbox_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED)
+ nbox_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 8;
+ if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc)
+ nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc) / 2;
+ assert(nbox_this_time);
+ nbox -= nbox_this_time;
+
+ /* Count the total number of bytes to be read and allocate a
+ * single buffer large enough. Or if it is very small, combine
+ * with other allocations. */
+ offset = 0;
+ for (n = 0; n < nbox_this_time; n++) {
+ int height = box[n].y2 - box[n].y1;
+ int width = box[n].x2 - box[n].x1;
+ offset += PITCH(width, dst->drawable.bitsPerPixel >> 3) * height;
+ }
+
+ src_bo = kgem_create_buffer(kgem, offset,
+ KGEM_BUFFER_WRITE | (nbox ? KGEM_BUFFER_LAST : 0),
+ &ptr);
+ if (!src_bo)
+ break;
+
+ offset = 0;
+ do {
+ int height = box->y2 - box->y1;
+ int width = box->x2 - box->x1;
+ int pitch = PITCH(width, dst->drawable.bitsPerPixel >> 3);
+ uint32_t *b;
+
+ DBG((" %s: box src=(%d, %d), dst=(%d, %d) size=(%d, %d), dst offset=%d, dst pitch=%d\n",
+ __FUNCTION__,
+ box->x1 + src_dx, box->y1 + src_dy,
+ box->x1 + dst_dx, box->y1 + dst_dy,
+ width, height,
+ offset, pitch));
+
+ assert(box->x1 + src_dx >= 0);
+ assert((box->x2 + src_dx)*dst->drawable.bitsPerPixel <= 8*stride);
+ assert(box->y1 + src_dy >= 0);
+
+ assert(box->x1 + dst_dx >= 0);
+ assert(box->y1 + dst_dy >= 0);
+
+ memcpy_xor(src, (char *)ptr + offset,
+ dst->drawable.bitsPerPixel,
+ stride, pitch,
+ box->x1 + src_dx, box->y1 + src_dy,
+ 0, 0,
+ width, height,
+ and, or);
+
+ b = kgem->batch + kgem->nbatch;
+ b[0] = cmd;
+ b[1] = br13;
+ b[2] = (box->y1 + dst_dy) << 16 | (box->x1 + dst_dx);
+ b[3] = (box->y2 + dst_dy) << 16 | (box->x2 + dst_dx);
+ b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst_bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ I915_GEM_DOMAIN_RENDER |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[5] = 0;
+ b[6] = pitch;
+ b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src_bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ KGEM_RELOC_FENCED,
+ offset);
+ kgem->nbatch += 8;
+
+ box++;
+ offset += pitch * height;
+ } while (--nbox_this_time);
+ assert(offset == src_bo->size);
+
+ if (nbox) {
+ _kgem_submit(kgem);
+ _kgem_set_mode(kgem, KGEM_BLT);
+ }
+
+ kgem_bo_destroy(kgem, src_bo);
+ } while (nbox);
+
+ sna->blt_state.fill_bo = 0;
+}
+
struct kgem_bo *sna_replace(struct sna *sna,
PixmapPtr pixmap,
struct kgem_bo *bo,
diff --git a/src/sna/sna_reg.h b/src/sna/sna_reg.h
index ff2ff3b7..551d64b0 100644
--- a/src/sna/sna_reg.h
+++ b/src/sna/sna_reg.h
@@ -55,6 +55,7 @@
#define XY_MONO_PAT ((0x2<<29)|(0x52<<22)|0x7)
#define XY_MONO_SRC_COPY ((0x2<<29)|(0x54<<22)|(0x6))
#define XY_MONO_SRC_COPY_IMM ((0x2<<29)|(0x71<<22))
+#define XY_FULL_MONO_PATTERN_BLT ((0x2<<29)|(0x57<<22)|0xa)
#define XY_FULL_MONO_PATTERN_MONO_SRC_BLT ((0x2<<29)|(0x58<<22)|0xa)
/* FLUSH commands */