summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/sna/kgem.c128
-rw-r--r--src/sna/kgem.h7
-rw-r--r--src/sna/sna_accel.c1651
-rw-r--r--src/sna/sna_blt.c989
-rw-r--r--src/sna/sna_io.c679
-rw-r--r--src/sna/sna_reg.h32
-rw-r--r--src/uxa/intel_driver.c3
7 files changed, 2445 insertions, 1044 deletions
diff --git a/src/sna/kgem.c b/src/sna/kgem.c
index 2710557d..4d225d1f 100644
--- a/src/sna/kgem.c
+++ b/src/sna/kgem.c
@@ -4982,6 +4982,7 @@ uint32_t kgem_add_reloc(struct kgem *kgem,
DBG(("%s: handle=%d, pos=%d, delta=%d, domains=%08x\n",
__FUNCTION__, bo ? bo->handle : 0, pos, delta, read_write_domain));
+ assert(kgem->gen < 0100);
assert((read_write_domain & 0x7fff) == 0 || bo != NULL);
index = kgem->nreloc++;
@@ -5053,6 +5054,77 @@ uint32_t kgem_add_reloc(struct kgem *kgem,
return delta;
}
+uint64_t kgem_add_reloc64(struct kgem *kgem,
+ uint32_t pos,
+ struct kgem_bo *bo,
+ uint32_t read_write_domain,
+ uint64_t delta)
+{
+ int index;
+
+ DBG(("%s: handle=%d, pos=%d, delta=%d, domains=%08x\n",
+ __FUNCTION__, bo ? bo->handle : 0, pos, delta, read_write_domain));
+
+ assert(kgem->gen >= 0100);
+ assert((read_write_domain & 0x7fff) == 0 || bo != NULL);
+
+ index = kgem->nreloc++;
+ assert(index < ARRAY_SIZE(kgem->reloc));
+ kgem->reloc[index].offset = pos * sizeof(kgem->batch[0]);
+ if (bo) {
+ assert(kgem->mode != KGEM_NONE);
+ assert(bo->refcnt);
+ while (bo->proxy) {
+ DBG(("%s: adding proxy [delta=%d] for handle=%d\n",
+ __FUNCTION__, bo->delta, bo->handle));
+ delta += bo->delta;
+ assert(bo->handle == bo->proxy->handle);
+ /* need to release the cache upon batch submit */
+ if (bo->exec == NULL) {
+ list_move_tail(&bo->request,
+ &kgem->next_request->buffers);
+ bo->rq = MAKE_REQUEST(kgem->next_request,
+ kgem->ring);
+ bo->exec = &_kgem_dummy_exec;
+ bo->domain = DOMAIN_GPU;
+ }
+
+ if (read_write_domain & 0x7fff && !bo->gpu_dirty)
+ __kgem_bo_mark_dirty(bo);
+
+ bo = bo->proxy;
+ assert(bo->refcnt);
+ }
+ assert(bo->refcnt);
+
+ if (bo->exec == NULL)
+ kgem_add_bo(kgem, bo);
+ assert(bo->rq == MAKE_REQUEST(kgem->next_request, kgem->ring));
+ assert(RQ_RING(bo->rq) == kgem->ring);
+
+ kgem->reloc[index].delta = delta;
+ kgem->reloc[index].target_handle = bo->target_handle;
+ kgem->reloc[index].presumed_offset = bo->presumed_offset;
+
+ if (read_write_domain & 0x7fff && !bo->gpu_dirty) {
+ assert(!bo->snoop || kgem->can_blt_cpu);
+ __kgem_bo_mark_dirty(bo);
+ }
+
+ delta += bo->presumed_offset;
+ } else {
+ kgem->reloc[index].delta = delta;
+ kgem->reloc[index].target_handle = ~0U;
+ kgem->reloc[index].presumed_offset = 0;
+ if (kgem->nreloc__self < 256)
+ kgem->reloc__self[kgem->nreloc__self++] = index;
+ }
+ kgem->reloc[index].read_domains = read_write_domain >> 16;
+ kgem->reloc[index].write_domain = read_write_domain & 0x7fff;
+
+ return delta;
+}
+
static void kgem_trim_vma_cache(struct kgem *kgem, int type, int bucket)
{
int i, j;
@@ -6338,6 +6410,7 @@ kgem_replace_bo(struct kgem *kgem,
* we only attempt to allocate a linear bo.
*/
assert(src->tiling == I915_TILING_NONE);
+ assert(kgem_bo_can_blt(kgem, src));
size = height * pitch;
size = NUM_PAGES(size);
@@ -6363,7 +6436,7 @@ kgem_replace_bo(struct kgem *kgem,
dst->refcnt = 1;
kgem_set_mode(kgem, KGEM_BLT, dst);
- if (!kgem_check_batch(kgem, 8) ||
+ if (!kgem_check_batch(kgem, 10) ||
!kgem_check_reloc(kgem, 2) ||
!kgem_check_many_bo_fenced(kgem, src, dst, NULL)) {
kgem_submit(kgem);
@@ -6392,22 +6465,43 @@ kgem_replace_bo(struct kgem *kgem,
}
b = kgem->batch + kgem->nbatch;
- b[0] = br00;
- b[1] = br13;
- b[2] = 0;
- b[3] = height << 16 | width;
- b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst,
- I915_GEM_DOMAIN_RENDER << 16 |
- I915_GEM_DOMAIN_RENDER |
- KGEM_RELOC_FENCED,
- 0);
- b[5] = 0;
- b[6] = pitch;
- b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src,
- I915_GEM_DOMAIN_RENDER << 16 |
- KGEM_RELOC_FENCED,
- 0);
- kgem->nbatch += 8;
+ if (kgem->gen >= 0100) {
+ b[0] = br00 | 8;
+ b[1] = br13;
+ b[2] = 0;
+ b[3] = height << 16 | width;
+ *(uint64_t *)(b+4) =
+ kgem_add_reloc64(kgem, kgem->nbatch + 4, dst,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ I915_GEM_DOMAIN_RENDER |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[6] = 0;
+ b[7] = pitch;
+ *(uint64_t *)(b+8) =
+ kgem_add_reloc64(kgem, kgem->nbatch + 8, src,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ KGEM_RELOC_FENCED,
+ 0);
+ kgem->nbatch += 10;
+ } else {
+ b[0] = br00 | 6;
+ b[1] = br13;
+ b[2] = 0;
+ b[3] = height << 16 | width;
+ b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ I915_GEM_DOMAIN_RENDER |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[5] = 0;
+ b[6] = pitch;
+ b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ KGEM_RELOC_FENCED,
+ 0);
+ kgem->nbatch += 8;
+ }
return dst;
}
diff --git a/src/sna/kgem.h b/src/sna/kgem.h
index 7da359e0..b6d91561 100644
--- a/src/sna/kgem.h
+++ b/src/sna/kgem.h
@@ -65,11 +65,11 @@ struct kgem_bo {
uint16_t offset;
} binding;
+ uint64_t presumed_offset;
uint32_t unique_id;
uint32_t refcnt;
uint32_t handle;
uint32_t target_handle;
- uint32_t presumed_offset;
uint32_t delta;
union {
struct {
@@ -446,6 +446,11 @@ uint32_t kgem_add_reloc(struct kgem *kgem,
struct kgem_bo *bo,
uint32_t read_write_domains,
uint32_t delta);
+uint64_t kgem_add_reloc64(struct kgem *kgem,
+ uint32_t pos,
+ struct kgem_bo *bo,
+ uint32_t read_write_domains,
+ uint64_t delta);
void *kgem_bo_map(struct kgem *kgem, struct kgem_bo *bo);
void *kgem_bo_map__async(struct kgem *kgem, struct kgem_bo *bo);
diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c
index ffb61170..02caf4b9 100644
--- a/src/sna/sna_accel.c
+++ b/src/sna/sna_accel.c
@@ -4509,7 +4509,7 @@ sna_put_xybitmap_blt(DrawablePtr drawable, GCPtr gc, RegionPtr region,
struct kgem_bo *upload;
void *ptr;
- if (!kgem_check_batch(&sna->kgem, 8) ||
+ if (!kgem_check_batch(&sna->kgem, 10) ||
!kgem_check_bo_fenced(&sna->kgem, bo) ||
!kgem_check_reloc_and_exec(&sna->kgem, 2)) {
kgem_submit(&sna->kgem);
@@ -4548,32 +4548,61 @@ sna_put_xybitmap_blt(DrawablePtr drawable, GCPtr gc, RegionPtr region,
} while (--bh);
assert(sna->kgem.mode == KGEM_BLT);
- b = sna->kgem.batch + sna->kgem.nbatch;
- b[0] = XY_MONO_SRC_COPY | 3 << 20;
- b[0] |= ((box->x1 - x) & 7) << 17;
- b[1] = bo->pitch;
- if (sna->kgem.gen >= 040 && bo->tiling) {
- b[0] |= BLT_DST_TILED;
- b[1] >>= 2;
- }
- b[1] |= blt_depth(drawable->depth) << 24;
- b[1] |= rop << 16;
- b[2] = box->y1 << 16 | box->x1;
- b[3] = box->y2 << 16 | box->x2;
- b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4, bo,
- I915_GEM_DOMAIN_RENDER << 16 |
- I915_GEM_DOMAIN_RENDER |
- KGEM_RELOC_FENCED,
- 0);
- b[5] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 5,
- upload,
- I915_GEM_DOMAIN_RENDER << 16 |
- KGEM_RELOC_FENCED,
- 0);
- b[6] = gc->bgPixel;
- b[7] = gc->fgPixel;
+ if (sna->kgem.gen >= 0100) {
+ b = sna->kgem.batch + sna->kgem.nbatch;
+ b[0] = XY_MONO_SRC_COPY | 3 << 20 | 8;
+ b[0] |= ((box->x1 - x) & 7) << 17;
+ b[1] = bo->pitch;
+ if (bo->tiling) {
+ b[0] |= BLT_DST_TILED;
+ b[1] >>= 2;
+ }
+ b[1] |= blt_depth(drawable->depth) << 24;
+ b[1] |= rop << 16;
+ b[2] = box->y1 << 16 | box->x1;
+ b[3] = box->y2 << 16 | box->x2;
+ *(uint64_t *)(b+4) =
+ kgem_add_reloc64(&sna->kgem, sna->kgem.nbatch + 4, bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ I915_GEM_DOMAIN_RENDER |
+ KGEM_RELOC_FENCED,
+ 0);
+ *(uint64_t *)(b+6) =
+ kgem_add_reloc64(&sna->kgem, sna->kgem.nbatch + 6, upload,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[8] = gc->bgPixel;
+ b[9] = gc->fgPixel;
+
+ sna->kgem.nbatch += 10;
+ } else {
+ b = sna->kgem.batch + sna->kgem.nbatch;
+ b[0] = XY_MONO_SRC_COPY | 3 << 20 | 6;
+ b[0] |= ((box->x1 - x) & 7) << 17;
+ b[1] = bo->pitch;
+ if (sna->kgem.gen >= 040 && bo->tiling) {
+ b[0] |= BLT_DST_TILED;
+ b[1] >>= 2;
+ }
+ b[1] |= blt_depth(drawable->depth) << 24;
+ b[1] |= rop << 16;
+ b[2] = box->y1 << 16 | box->x1;
+ b[3] = box->y2 << 16 | box->x2;
+ b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4, bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ I915_GEM_DOMAIN_RENDER |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[5] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 5, upload,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[6] = gc->bgPixel;
+ b[7] = gc->fgPixel;
- sna->kgem.nbatch += 8;
+ sna->kgem.nbatch += 8;
+ }
sigtrap_put();
}
kgem_bo_destroy(&sna->kgem, upload);
@@ -4646,7 +4675,7 @@ sna_put_xypixmap_blt(DrawablePtr drawable, GCPtr gc, RegionPtr region,
struct kgem_bo *upload;
void *ptr;
- if (!kgem_check_batch(&sna->kgem, 12) ||
+ if (!kgem_check_batch(&sna->kgem, 14) ||
!kgem_check_bo_fenced(&sna->kgem, bo) ||
!kgem_check_reloc_and_exec(&sna->kgem, 2)) {
kgem_submit(&sna->kgem);
@@ -4683,38 +4712,70 @@ sna_put_xypixmap_blt(DrawablePtr drawable, GCPtr gc, RegionPtr region,
} while (--bh);
assert(sna->kgem.mode == KGEM_BLT);
- b = sna->kgem.batch + sna->kgem.nbatch;
- b[0] = XY_FULL_MONO_PATTERN_MONO_SRC_BLT | 3 << 20;
- b[0] |= ((box->x1 - x) & 7) << 17;
- b[1] = bo->pitch;
- if (sna->kgem.gen >= 040 && bo->tiling) {
- b[0] |= BLT_DST_TILED;
- b[1] >>= 2;
+ if (sna->kgem.gen >= 0100) {
+ assert(sna->kgem.mode == KGEM_BLT);
+ b = sna->kgem.batch + sna->kgem.nbatch;
+ b[0] = XY_FULL_MONO_PATTERN_MONO_SRC_BLT | 3 << 20 | 12;
+ b[0] |= ((box->x1 - x) & 7) << 17;
+ b[1] = bo->pitch;
+ if (bo->tiling) {
+ b[0] |= BLT_DST_TILED;
+ b[1] >>= 2;
+ }
+ b[1] |= 1 << 31; /* solid pattern */
+ b[1] |= blt_depth(drawable->depth) << 24;
+ b[1] |= 0xce << 16; /* S or (D and !P) */
+ b[2] = box->y1 << 16 | box->x1;
+ b[3] = box->y2 << 16 | box->x2;
+ *(uint64_t *)(b+4) =
+ kgem_add_reloc64(&sna->kgem, sna->kgem.nbatch + 4, bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ I915_GEM_DOMAIN_RENDER |
+ KGEM_RELOC_FENCED,
+ 0);
+ *(uint64_t *)(b+6) =
+ kgem_add_reloc64(&sna->kgem, sna->kgem.nbatch + 6, upload,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[8] = 0;
+ b[9] = i;
+ b[10] = i;
+ b[11] = i;
+ b[12] = -1;
+ b[13] = -1;
+ sna->kgem.nbatch += 14;
+ } else {
+ b = sna->kgem.batch + sna->kgem.nbatch;
+ b[0] = XY_FULL_MONO_PATTERN_MONO_SRC_BLT | 3 << 20 | 10;
+ b[0] |= ((box->x1 - x) & 7) << 17;
+ b[1] = bo->pitch;
+ if (sna->kgem.gen >= 040 && bo->tiling) {
+ b[0] |= BLT_DST_TILED;
+ b[1] >>= 2;
+ }
+ b[1] |= 1 << 31; /* solid pattern */
+ b[1] |= blt_depth(drawable->depth) << 24;
+ b[1] |= 0xce << 16; /* S or (D and !P) */
+ b[2] = box->y1 << 16 | box->x1;
+ b[3] = box->y2 << 16 | box->x2;
+ b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4, bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ I915_GEM_DOMAIN_RENDER |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[5] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 5, upload,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[6] = 0;
+ b[7] = i;
+ b[8] = i;
+ b[9] = i;
+ b[10] = -1;
+ b[11] = -1;
+ sna->kgem.nbatch += 12;
}
- b[1] |= 1 << 31; /* solid pattern */
- b[1] |= blt_depth(drawable->depth) << 24;
- b[1] |= 0xce << 16; /* S or (D and !P) */
- b[2] = box->y1 << 16 | box->x1;
- b[3] = box->y2 << 16 | box->x2;
- b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4,
- bo,
- I915_GEM_DOMAIN_RENDER << 16 |
- I915_GEM_DOMAIN_RENDER |
- KGEM_RELOC_FENCED,
- 0);
- b[5] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 5,
- upload,
- I915_GEM_DOMAIN_RENDER << 16 |
- KGEM_RELOC_FENCED,
- 0);
- b[6] = 0;
- b[7] = i;
- b[8] = i;
- b[9] = i;
- b[10] = -1;
- b[11] = -1;
-
- sna->kgem.nbatch += 12;
sigtrap_put();
}
kgem_bo_destroy(&sna->kgem, upload);
@@ -7367,7 +7428,7 @@ sna_copy_bitmap_blt(DrawablePtr _bitmap, DrawablePtr drawable, GCPtr gc,
if (src_stride <= 128) {
src_stride = ALIGN(src_stride, 8) / 4;
assert(src_stride <= 32);
- if (!kgem_check_batch(&sna->kgem, 7+src_stride) ||
+ if (!kgem_check_batch(&sna->kgem, 8+src_stride) ||
!kgem_check_bo_fenced(&sna->kgem, arg->bo) ||
!kgem_check_reloc(&sna->kgem, 1)) {
kgem_submit(&sna->kgem);
@@ -7377,24 +7438,43 @@ sna_copy_bitmap_blt(DrawablePtr _bitmap, DrawablePtr drawable, GCPtr gc,
}
assert(sna->kgem.mode == KGEM_BLT);
- b = sna->kgem.batch + sna->kgem.nbatch;
- b[0] = XY_MONO_SRC_COPY_IMM | (5 + src_stride) | br00;
- b[0] |= ((box->x1 + sx) & 7) << 17;
- b[1] = br13;
- b[2] = (box->y1 + dy) << 16 | (box->x1 + dx);
- b[3] = (box->y2 + dy) << 16 | (box->x2 + dx);
- b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4,
- arg->bo,
- I915_GEM_DOMAIN_RENDER << 16 |
- I915_GEM_DOMAIN_RENDER |
- KGEM_RELOC_FENCED,
- 0);
- b[5] = gc->bgPixel;
- b[6] = gc->fgPixel;
+ if (sna->kgem.gen >= 0100) {
+ b = sna->kgem.batch + sna->kgem.nbatch;
+ b[0] = XY_MONO_SRC_COPY_IMM | (6 + src_stride) | br00;
+ b[0] |= ((box->x1 + sx) & 7) << 17;
+ b[1] = br13;
+ b[2] = (box->y1 + dy) << 16 | (box->x1 + dx);
+ b[3] = (box->y2 + dy) << 16 | (box->x2 + dx);
+ *(uint64_t *)(b+4) =
+ kgem_add_reloc64(&sna->kgem, sna->kgem.nbatch + 4, arg->bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ I915_GEM_DOMAIN_RENDER |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[5] = gc->bgPixel;
+ b[6] = gc->fgPixel;
- sna->kgem.nbatch += 7 + src_stride;
+ dst = (uint8_t *)&b[8];
+ sna->kgem.nbatch += 8 + src_stride;
+ } else {
+ b = sna->kgem.batch + sna->kgem.nbatch;
+ b[0] = XY_MONO_SRC_COPY_IMM | (5 + src_stride) | br00;
+ b[0] |= ((box->x1 + sx) & 7) << 17;
+ b[1] = br13;
+ b[2] = (box->y1 + dy) << 16 | (box->x1 + dx);
+ b[3] = (box->y2 + dy) << 16 | (box->x2 + dx);
+ b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4, arg->bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ I915_GEM_DOMAIN_RENDER |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[5] = gc->bgPixel;
+ b[6] = gc->fgPixel;
+
+ dst = (uint8_t *)&b[7];
+ sna->kgem.nbatch += 7 + src_stride;
+ }
- dst = (uint8_t *)&b[7];
src_stride = bitmap->devKind;
src = bitmap->devPrivate.ptr;
src += (box->y1 + sy) * src_stride + bx1/8;
@@ -7414,7 +7494,7 @@ sna_copy_bitmap_blt(DrawablePtr _bitmap, DrawablePtr drawable, GCPtr gc,
struct kgem_bo *upload;
void *ptr;
- if (!kgem_check_batch(&sna->kgem, 8) ||
+ if (!kgem_check_batch(&sna->kgem, 10) ||
!kgem_check_bo_fenced(&sna->kgem, arg->bo) ||
!kgem_check_reloc_and_exec(&sna->kgem, 2)) {
kgem_submit(&sna->kgem);
@@ -7433,27 +7513,47 @@ sna_copy_bitmap_blt(DrawablePtr _bitmap, DrawablePtr drawable, GCPtr gc,
if (sigtrap_get() == 0) {
assert(sna->kgem.mode == KGEM_BLT);
b = sna->kgem.batch + sna->kgem.nbatch;
+ if (sna->kgem.gen >= 0100) {
+ b[0] = XY_MONO_SRC_COPY | br00 | 8;
+ b[0] |= ((box->x1 + sx) & 7) << 17;
+ b[1] = br13;
+ b[2] = (box->y1 + dy) << 16 | (box->x1 + dx);
+ b[3] = (box->y2 + dy) << 16 | (box->x2 + dx);
+ *(uint64_t *)(b+4) =
+ kgem_add_reloc64(&sna->kgem, sna->kgem.nbatch + 4, arg->bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ I915_GEM_DOMAIN_RENDER |
+ KGEM_RELOC_FENCED,
+ 0);
+ *(uint64_t *)(b+6) =
+ kgem_add_reloc64(&sna->kgem, sna->kgem.nbatch + 6, upload,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[8] = gc->bgPixel;
+ b[9] = gc->fgPixel;
+
+ sna->kgem.nbatch += 10;
+ } else {
+ b[0] = XY_MONO_SRC_COPY | br00 | 6;
+ b[0] |= ((box->x1 + sx) & 7) << 17;
+ b[1] = br13;
+ b[2] = (box->y1 + dy) << 16 | (box->x1 + dx);
+ b[3] = (box->y2 + dy) << 16 | (box->x2 + dx);
+ b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4, arg->bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ I915_GEM_DOMAIN_RENDER |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[5] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 5, upload,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[6] = gc->bgPixel;
+ b[7] = gc->fgPixel;
- b[0] = XY_MONO_SRC_COPY | br00;
- b[0] |= ((box->x1 + sx) & 7) << 17;
- b[1] = br13;
- b[2] = (box->y1 + dy) << 16 | (box->x1 + dx);
- b[3] = (box->y2 + dy) << 16 | (box->x2 + dx);
- b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4,
- arg->bo,
- I915_GEM_DOMAIN_RENDER << 16 |
- I915_GEM_DOMAIN_RENDER |
- KGEM_RELOC_FENCED,
- 0);
- b[5] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 5,
- upload,
- I915_GEM_DOMAIN_RENDER << 16 |
- KGEM_RELOC_FENCED,
- 0);
- b[6] = gc->bgPixel;
- b[7] = gc->fgPixel;
-
- sna->kgem.nbatch += 8;
+ sna->kgem.nbatch += 8;
+ }
dst = ptr;
src_stride = bitmap->devKind;
@@ -7542,7 +7642,7 @@ sna_copy_plane_blt(DrawablePtr source, DrawablePtr drawable, GCPtr gc,
box->x2, box->y2,
sx, sy, bx1, bx2));
- if (!kgem_check_batch(&sna->kgem, 8) ||
+ if (!kgem_check_batch(&sna->kgem, 10) ||
!kgem_check_bo_fenced(&sna->kgem, arg->bo) ||
!kgem_check_reloc_and_exec(&sna->kgem, 2)) {
kgem_submit(&sna->kgem);
@@ -7667,25 +7767,45 @@ sna_copy_plane_blt(DrawablePtr source, DrawablePtr drawable, GCPtr gc,
assert(sna->kgem.mode == KGEM_BLT);
b = sna->kgem.batch + sna->kgem.nbatch;
- b[0] = br00 | ((box->x1 + sx) & 7) << 17;
- b[1] = br13;
- b[2] = (box->y1 + dy) << 16 | (box->x1 + dx);
- b[3] = (box->y2 + dy) << 16 | (box->x2 + dx);
- b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4,
- arg->bo,
- I915_GEM_DOMAIN_RENDER << 16 |
- I915_GEM_DOMAIN_RENDER |
- KGEM_RELOC_FENCED,
- 0);
- b[5] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 5,
- upload,
- I915_GEM_DOMAIN_RENDER << 16 |
- KGEM_RELOC_FENCED,
- 0);
- b[6] = gc->bgPixel;
- b[7] = gc->fgPixel;
+ if (sna->kgem.gen >= 0100) {
+ b[0] = br00 | ((box->x1 + sx) & 7) << 17 | 8;
+ b[1] = br13;
+ b[2] = (box->y1 + dy) << 16 | (box->x1 + dx);
+ b[3] = (box->y2 + dy) << 16 | (box->x2 + dx);
+ *(uint64_t *)(b+4) =
+ kgem_add_reloc64(&sna->kgem, sna->kgem.nbatch + 4, arg->bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ I915_GEM_DOMAIN_RENDER |
+ KGEM_RELOC_FENCED,
+ 0);
+ *(uint64_t *)(b+6) =
+ kgem_add_reloc64(&sna->kgem, sna->kgem.nbatch + 6, upload,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[8] = gc->bgPixel;
+ b[9] = gc->fgPixel;
+
+ sna->kgem.nbatch += 10;
+ } else {
+ b[0] = br00 | ((box->x1 + sx) & 7) << 17 | 6;
+ b[1] = br13;
+ b[2] = (box->y1 + dy) << 16 | (box->x1 + dx);
+ b[3] = (box->y2 + dy) << 16 | (box->x2 + dx);
+ b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4, arg->bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ I915_GEM_DOMAIN_RENDER |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[5] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 5, upload,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[6] = gc->bgPixel;
+ b[7] = gc->fgPixel;
- sna->kgem.nbatch += 8;
+ sna->kgem.nbatch += 8;
+ }
sigtrap_put();
}
kgem_bo_destroy(&sna->kgem, upload);
@@ -11174,7 +11294,7 @@ sna_poly_fill_rect_tiled_8x8_blt(DrawablePtr drawable,
__FUNCTION__, n, r->x, r->y, r->width, r->height, clipped));
kgem_set_mode(&sna->kgem, KGEM_BLT, bo);
- if (!kgem_check_batch(&sna->kgem, 8+2*3) ||
+ if (!kgem_check_batch(&sna->kgem, 10+2*3) ||
!kgem_check_reloc(&sna->kgem, 2) ||
!kgem_check_bo_fenced(&sna->kgem, bo)) {
kgem_submit(&sna->kgem);
@@ -11218,41 +11338,81 @@ sna_poly_fill_rect_tiled_8x8_blt(DrawablePtr drawable,
assert(sna->kgem.mode == KGEM_BLT);
b = sna->kgem.batch + sna->kgem.nbatch;
- b[0] = XY_PAT_BLT | tx << 12 | ty << 8 | 3 << 20 | (br00 & BLT_DST_TILED);
- b[1] = br13;
- b[2] = (r->y + dy) << 16 | (r->x + dx);
- b[3] = (r->y + r->height + dy) << 16 | (r->x + r->width + dx);
- b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4, bo,
- I915_GEM_DOMAIN_RENDER << 16 |
- I915_GEM_DOMAIN_RENDER |
- KGEM_RELOC_FENCED,
- 0);
- b[5] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 5, tile_bo,
- I915_GEM_DOMAIN_RENDER << 16 |
- KGEM_RELOC_FENCED,
- 0);
- sna->kgem.nbatch += 6;
+ if (sna->kgem.gen >= 0100) {
+ b[0] = XY_PAT_BLT | tx << 12 | ty << 8 | 3 << 20 | (br00 & BLT_DST_TILED) | 6;
+ b[1] = br13;
+ b[2] = (r->y + dy) << 16 | (r->x + dx);
+ b[3] = (r->y + r->height + dy) << 16 | (r->x + r->width + dx);
+ *(uint64_t *)(b+4) =
+ kgem_add_reloc64(&sna->kgem, sna->kgem.nbatch + 4, bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ I915_GEM_DOMAIN_RENDER |
+ KGEM_RELOC_FENCED,
+ 0);
+ *(uint64_t *)(b+6) =
+ kgem_add_reloc64(&sna->kgem, sna->kgem.nbatch + 6, tile_bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ KGEM_RELOC_FENCED,
+ 0);
+ sna->kgem.nbatch += 8;
+ } else {
+ b[0] = XY_PAT_BLT | tx << 12 | ty << 8 | 3 << 20 | (br00 & BLT_DST_TILED) | 4;
+ b[1] = br13;
+ b[2] = (r->y + dy) << 16 | (r->x + dx);
+ b[3] = (r->y + r->height + dy) << 16 | (r->x + r->width + dx);
+ b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4, bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ I915_GEM_DOMAIN_RENDER |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[5] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 5, tile_bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ KGEM_RELOC_FENCED,
+ 0);
+ sna->kgem.nbatch += 6;
+ }
} else do {
int n_this_time;
assert(sna->kgem.mode == KGEM_BLT);
b = sna->kgem.batch + sna->kgem.nbatch;
- b[0] = XY_SETUP_BLT | 3 << 20;
- b[1] = br13;
- b[2] = 0;
- b[3] = 0;
- b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4, bo,
- I915_GEM_DOMAIN_RENDER << 16 |
- I915_GEM_DOMAIN_RENDER |
- KGEM_RELOC_FENCED,
- 0);
- b[5] = gc->bgPixel;
- b[6] = gc->fgPixel;
- b[7] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 7, tile_bo,
- I915_GEM_DOMAIN_RENDER << 16 |
- KGEM_RELOC_FENCED,
- 0);
- sna->kgem.nbatch += 8;
+ if (sna->kgem.gen >= 0100) {
+ b[0] = XY_SETUP_BLT | 3 << 20 | 8;
+ b[1] = br13;
+ b[2] = 0;
+ b[3] = 0;
+ *(uint64_t *)(b+4) =
+ kgem_add_reloc64(&sna->kgem, sna->kgem.nbatch + 4, bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ I915_GEM_DOMAIN_RENDER |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[6] = gc->bgPixel;
+ b[7] = gc->fgPixel;
+ *(uint64_t *)(b+8) =
+ kgem_add_reloc64(&sna->kgem, sna->kgem.nbatch + 8, tile_bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ KGEM_RELOC_FENCED,
+ 0);
+ sna->kgem.nbatch += 10;
+ } else {
+ b[0] = XY_SETUP_BLT | 3 << 20 | 6;
+ b[1] = br13;
+ b[2] = 0;
+ b[3] = 0;
+ b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4, bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ I915_GEM_DOMAIN_RENDER |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[5] = gc->bgPixel;
+ b[6] = gc->fgPixel;
+ b[7] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 7, tile_bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ KGEM_RELOC_FENCED,
+ 0);
+ sna->kgem.nbatch += 8;
+ }
n_this_time = n;
if (3*n_this_time > sna->kgem.surface - sna->kgem.nbatch - KGEM_BATCH_RESERVED)
@@ -11301,22 +11461,43 @@ sna_poly_fill_rect_tiled_8x8_blt(DrawablePtr drawable,
assert(sna->kgem.mode == KGEM_BLT);
b = sna->kgem.batch + sna->kgem.nbatch;
- b[0] = XY_SETUP_BLT | 3 << 20;
- b[1] = br13;
- b[2] = 0;
- b[3] = 0;
- b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4, bo,
- I915_GEM_DOMAIN_RENDER << 16 |
- I915_GEM_DOMAIN_RENDER |
- KGEM_RELOC_FENCED,
- 0);
- b[5] = gc->bgPixel;
- b[6] = gc->fgPixel;
- b[7] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 7, tile_bo,
- I915_GEM_DOMAIN_RENDER << 16 |
- KGEM_RELOC_FENCED,
- 0);
- sna->kgem.nbatch += 8;
+ if (sna->kgem.gen >= 0100) {
+ b[0] = XY_SETUP_BLT | 3 << 20 | 8;
+ b[1] = br13;
+ b[2] = 0;
+ b[3] = 0;
+ *(uint64_t *)(b+4) =
+ kgem_add_reloc64(&sna->kgem, sna->kgem.nbatch + 4, bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ I915_GEM_DOMAIN_RENDER |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[6] = gc->bgPixel;
+ b[7] = gc->fgPixel;
+ *(uint64_t *)(b+8) =
+ kgem_add_reloc64(&sna->kgem, sna->kgem.nbatch + 8, tile_bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ KGEM_RELOC_FENCED,
+ 0);
+ sna->kgem.nbatch += 10;
+ } else {
+ b[0] = XY_SETUP_BLT | 3 << 20 | 6;
+ b[1] = br13;
+ b[2] = 0;
+ b[3] = 0;
+ b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4, bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ I915_GEM_DOMAIN_RENDER |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[5] = gc->bgPixel;
+ b[6] = gc->fgPixel;
+ b[7] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 7, tile_bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ KGEM_RELOC_FENCED,
+ 0);
+ sna->kgem.nbatch += 8;
+ }
if (clip.data == NULL) {
const BoxRec *c = &clip.extents;
@@ -11339,22 +11520,43 @@ sna_poly_fill_rect_tiled_8x8_blt(DrawablePtr drawable,
assert(sna->kgem.mode == KGEM_BLT);
b = sna->kgem.batch + sna->kgem.nbatch;
- b[0] = XY_SETUP_BLT | 3 << 20;
- b[1] = br13;
- b[2] = 0;
- b[3] = 0;
- b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4, bo,
- I915_GEM_DOMAIN_RENDER << 16 |
- I915_GEM_DOMAIN_RENDER |
- KGEM_RELOC_FENCED,
- 0);
- b[5] = gc->bgPixel;
- b[6] = gc->fgPixel;
- b[7] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 7, tile_bo,
- I915_GEM_DOMAIN_RENDER << 16 |
- KGEM_RELOC_FENCED,
- 0);
- sna->kgem.nbatch += 8;
+ if (sna->kgem.gen >= 0100) {
+ b[0] = XY_SETUP_BLT | 3 << 20 | 8;
+ b[1] = br13;
+ b[2] = 0;
+ b[3] = 0;
+ *(uint64_t *)(b+4) =
+ kgem_add_reloc64(&sna->kgem, sna->kgem.nbatch + 4, bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ I915_GEM_DOMAIN_RENDER |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[6] = gc->bgPixel;
+ b[7] = gc->fgPixel;
+ *(uint64_t *)(b+8) =
+ kgem_add_reloc64(&sna->kgem, sna->kgem.nbatch + 8, tile_bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ KGEM_RELOC_FENCED,
+ 0);
+ sna->kgem.nbatch += 10;
+ } else {
+ b[0] = XY_SETUP_BLT | 3 << 20 | 6;
+ b[1] = br13;
+ b[2] = 0;
+ b[3] = 0;
+ b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4, bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ I915_GEM_DOMAIN_RENDER |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[5] = gc->bgPixel;
+ b[6] = gc->fgPixel;
+ b[7] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 7, tile_bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ KGEM_RELOC_FENCED,
+ 0);
+ sna->kgem.nbatch += 8;
+ }
}
assert(box.x1 + dx >= 0);
@@ -11412,22 +11614,43 @@ sna_poly_fill_rect_tiled_8x8_blt(DrawablePtr drawable,
assert(sna->kgem.mode == KGEM_BLT);
b = sna->kgem.batch + sna->kgem.nbatch;
- b[0] = XY_SETUP_BLT | 3 << 20;
- b[1] = br13;
- b[2] = 0;
- b[3] = 0;
- b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4, bo,
- I915_GEM_DOMAIN_RENDER << 16 |
- I915_GEM_DOMAIN_RENDER |
- KGEM_RELOC_FENCED,
- 0);
- b[5] = gc->bgPixel;
- b[6] = gc->fgPixel;
- b[7] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 7, tile_bo,
- I915_GEM_DOMAIN_RENDER << 16 |
- KGEM_RELOC_FENCED,
- 0);
- sna->kgem.nbatch += 8;
+ if (sna->kgem.gen >= 0100) {
+ b[0] = XY_SETUP_BLT | 3 << 20 | 8;
+ b[1] = br13;
+ b[2] = 0;
+ b[3] = 0;
+ *(uint64_t *)(b+4) =
+ kgem_add_reloc64(&sna->kgem, sna->kgem.nbatch + 4, bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ I915_GEM_DOMAIN_RENDER |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[6] = gc->bgPixel;
+ b[7] = gc->fgPixel;
+ *(uint64_t *)(b+8) =
+ kgem_add_reloc64(&sna->kgem, sna->kgem.nbatch + 8, tile_bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ KGEM_RELOC_FENCED,
+ 0);
+ sna->kgem.nbatch += 10;
+ } else {
+ b[0] = XY_SETUP_BLT | 3 << 20 | 6;
+ b[1] = br13;
+ b[2] = 0;
+ b[3] = 0;
+ b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4, bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ I915_GEM_DOMAIN_RENDER |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[5] = gc->bgPixel;
+ b[6] = gc->fgPixel;
+ b[7] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 7, tile_bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ KGEM_RELOC_FENCED,
+ 0);
+ sna->kgem.nbatch += 8;
+ }
}
assert(bb.x1 + dx >= 0);
@@ -11840,7 +12063,7 @@ sna_poly_fill_rect_stippled_8x8_blt(DrawablePtr drawable,
}
kgem_set_mode(&sna->kgem, KGEM_BLT, bo);
- if (!kgem_check_batch(&sna->kgem, 9 + 2*3) ||
+ if (!kgem_check_batch(&sna->kgem, 10 + 2*3) ||
!kgem_check_bo_fenced(&sna->kgem, bo) ||
!kgem_check_reloc(&sna->kgem, 1)) {
kgem_submit(&sna->kgem);
@@ -11860,39 +12083,75 @@ sna_poly_fill_rect_stippled_8x8_blt(DrawablePtr drawable,
assert(sna->kgem.mode == KGEM_BLT);
b = sna->kgem.batch + sna->kgem.nbatch;
- b[0] = XY_MONO_PAT | (br00 & (BLT_DST_TILED | 0x7<<12 | 0x7<<8)) | 3<<20;
- b[1] = br13;
- b[2] = (r->y + dy) << 16 | (r->x + dx);
- b[3] = (r->y + r->height + dy) << 16 | (r->x + r->width + dx);
- b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4, bo,
- I915_GEM_DOMAIN_RENDER << 16 |
- I915_GEM_DOMAIN_RENDER |
- KGEM_RELOC_FENCED,
- 0);
- b[5] = gc->bgPixel;
- b[6] = gc->fgPixel;
- b[7] = pat[0];
- b[8] = pat[1];
- sna->kgem.nbatch += 9;
+ if (sna->kgem.gen >= 0100) {
+ b[0] = XY_MONO_PAT | (br00 & (BLT_DST_TILED | 0x7<<12 | 0x7<<8)) | 3<<20 | 8;
+ b[1] = br13;
+ b[2] = (r->y + dy) << 16 | (r->x + dx);
+ b[3] = (r->y + r->height + dy) << 16 | (r->x + r->width + dx);
+ *(uint64_t *)(b+4) =
+ kgem_add_reloc64(&sna->kgem, sna->kgem.nbatch + 4, bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ I915_GEM_DOMAIN_RENDER |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[6] = gc->bgPixel;
+ b[7] = gc->fgPixel;
+ b[8] = pat[0];
+ b[9] = pat[1];
+ sna->kgem.nbatch += 10;
+ } else {
+ b[0] = XY_MONO_PAT | (br00 & (BLT_DST_TILED | 0x7<<12 | 0x7<<8)) | 3<<20 | 7;
+ b[1] = br13;
+ b[2] = (r->y + dy) << 16 | (r->x + dx);
+ b[3] = (r->y + r->height + dy) << 16 | (r->x + r->width + dx);
+ b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4, bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ I915_GEM_DOMAIN_RENDER |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[5] = gc->bgPixel;
+ b[6] = gc->fgPixel;
+ b[7] = pat[0];
+ b[8] = pat[1];
+ sna->kgem.nbatch += 9;
+ }
} else do {
int n_this_time;
assert(sna->kgem.mode == KGEM_BLT);
b = sna->kgem.batch + sna->kgem.nbatch;
- b[0] = XY_SETUP_MONO_PATTERN_SL_BLT | 3 << 20;
- b[1] = br13;
- b[2] = 0;
- b[3] = 0;
- b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4, bo,
- I915_GEM_DOMAIN_RENDER << 16 |
- I915_GEM_DOMAIN_RENDER |
- KGEM_RELOC_FENCED,
- 0);
- b[5] = gc->bgPixel;
- b[6] = gc->fgPixel;
- b[7] = pat[0];
- b[8] = pat[1];
- sna->kgem.nbatch += 9;
+ if (sna->kgem.gen >= 0100) {
+ b[0] = XY_SETUP_MONO_PATTERN_SL_BLT | 3 << 20 | 8;
+ b[1] = br13;
+ b[2] = 0;
+ b[3] = 0;
+ *(uint64_t *)(b+4) =
+ kgem_add_reloc64(&sna->kgem, sna->kgem.nbatch + 4, bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ I915_GEM_DOMAIN_RENDER |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[6] = gc->bgPixel;
+ b[7] = gc->fgPixel;
+ b[8] = pat[0];
+ b[9] = pat[1];
+ sna->kgem.nbatch += 10;
+ } else {
+ b[0] = XY_SETUP_MONO_PATTERN_SL_BLT | 3 << 20 | 7;
+ b[1] = br13;
+ b[2] = 0;
+ b[3] = 0;
+ b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4, bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ I915_GEM_DOMAIN_RENDER |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[5] = gc->bgPixel;
+ b[6] = gc->fgPixel;
+ b[7] = pat[0];
+ b[8] = pat[1];
+ sna->kgem.nbatch += 9;
+ }
n_this_time = n;
if (3*n_this_time > sna->kgem.surface - sna->kgem.nbatch - KGEM_BATCH_RESERVED)
@@ -11933,20 +12192,38 @@ sna_poly_fill_rect_stippled_8x8_blt(DrawablePtr drawable,
assert(sna->kgem.mode == KGEM_BLT);
b = sna->kgem.batch + sna->kgem.nbatch;
- b[0] = XY_SETUP_MONO_PATTERN_SL_BLT | 3 << 20;
- b[1] = br13;
- b[2] = 0;
- b[3] = 0;
- b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4, bo,
- I915_GEM_DOMAIN_RENDER << 16 |
- I915_GEM_DOMAIN_RENDER |
- KGEM_RELOC_FENCED,
- 0);
- b[5] = gc->bgPixel;
- b[6] = gc->fgPixel;
- b[7] = pat[0];
- b[8] = pat[1];
- sna->kgem.nbatch += 9;
+ if (sna->kgem.gen >= 0100) {
+ b[0] = XY_SETUP_MONO_PATTERN_SL_BLT | 3 << 20 | 8;
+ b[1] = br13;
+ b[2] = 0;
+ b[3] = 0;
+ *(uint64_t *)(b+4) =
+ kgem_add_reloc64(&sna->kgem, sna->kgem.nbatch + 4, bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ I915_GEM_DOMAIN_RENDER |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[6] = gc->bgPixel;
+ b[7] = gc->fgPixel;
+ b[8] = pat[0];
+ b[9] = pat[1];
+ sna->kgem.nbatch += 10;
+ } else {
+ b[0] = XY_SETUP_MONO_PATTERN_SL_BLT | 3 << 20 | 7;
+ b[1] = br13;
+ b[2] = 0;
+ b[3] = 0;
+ b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4, bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ I915_GEM_DOMAIN_RENDER |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[5] = gc->bgPixel;
+ b[6] = gc->fgPixel;
+ b[7] = pat[0];
+ b[8] = pat[1];
+ sna->kgem.nbatch += 9;
+ }
if (clip.data == NULL) {
do {
@@ -11965,20 +12242,38 @@ sna_poly_fill_rect_stippled_8x8_blt(DrawablePtr drawable,
assert(sna->kgem.mode == KGEM_BLT);
b = sna->kgem.batch + sna->kgem.nbatch;
- b[0] = XY_SETUP_MONO_PATTERN_SL_BLT | 3 << 20;
- b[1] = br13;
- b[2] = 0;
- b[3] = 0;
- b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4, bo,
- I915_GEM_DOMAIN_RENDER << 16 |
- I915_GEM_DOMAIN_RENDER |
- KGEM_RELOC_FENCED,
- 0);
- b[5] = gc->bgPixel;
- b[6] = gc->fgPixel;
- b[7] = pat[0];
- b[8] = pat[1];
- sna->kgem.nbatch += 9;
+ if (sna->kgem.gen >= 0100) {
+ b[0] = XY_SETUP_MONO_PATTERN_SL_BLT | 3 << 20 | 8;
+ b[1] = br13;
+ b[2] = 0;
+ b[3] = 0;
+ *(uint64_t *)(b+4) =
+ kgem_add_reloc64(&sna->kgem, sna->kgem.nbatch + 4, bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ I915_GEM_DOMAIN_RENDER |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[6] = gc->bgPixel;
+ b[7] = gc->fgPixel;
+ b[8] = pat[0];
+ b[9] = pat[1];
+ sna->kgem.nbatch += 10;
+ } else {
+ b[0] = XY_SETUP_MONO_PATTERN_SL_BLT | 3 << 20 | 7;
+ b[1] = br13;
+ b[2] = 0;
+ b[3] = 0;
+ b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4, bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ I915_GEM_DOMAIN_RENDER |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[5] = gc->bgPixel;
+ b[6] = gc->fgPixel;
+ b[7] = pat[0];
+ b[8] = pat[1];
+ sna->kgem.nbatch += 9;
+ }
}
assert(sna->kgem.mode == KGEM_BLT);
@@ -12019,20 +12314,38 @@ sna_poly_fill_rect_stippled_8x8_blt(DrawablePtr drawable,
assert(sna->kgem.mode == KGEM_BLT);
b = sna->kgem.batch + sna->kgem.nbatch;
- b[0] = XY_SETUP_MONO_PATTERN_SL_BLT | 3 << 20;
- b[1] = br13;
- b[2] = 0;
- b[3] = 0;
- b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4, bo,
- I915_GEM_DOMAIN_RENDER << 16 |
- I915_GEM_DOMAIN_RENDER |
- KGEM_RELOC_FENCED,
- 0);
- b[5] = gc->bgPixel;
- b[6] = gc->fgPixel;
- b[7] = pat[0];
- b[8] = pat[1];
- sna->kgem.nbatch += 9;
+ if (sna->kgem.gen >= 0100) {
+ b[0] = XY_SETUP_MONO_PATTERN_SL_BLT | 3 << 20 | 8;
+ b[1] = br13;
+ b[2] = 0;
+ b[3] = 0;
+ *(uint64_t *)(b+4) =
+ kgem_add_reloc64(&sna->kgem, sna->kgem.nbatch + 4, bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ I915_GEM_DOMAIN_RENDER |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[6] = gc->bgPixel;
+ b[7] = gc->fgPixel;
+ b[8] = pat[0];
+ b[9] = pat[1];
+ sna->kgem.nbatch += 10;
+ } else {
+ b[0] = XY_SETUP_MONO_PATTERN_SL_BLT | 3 << 20 | 7;
+ b[1] = br13;
+ b[2] = 0;
+ b[3] = 0;
+ b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4, bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ I915_GEM_DOMAIN_RENDER |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[5] = gc->bgPixel;
+ b[6] = gc->fgPixel;
+ b[7] = pat[0];
+ b[8] = pat[1];
+ sna->kgem.nbatch += 9;
+ }
}
assert(sna->kgem.mode == KGEM_BLT);
@@ -12161,7 +12474,7 @@ sna_poly_fill_rect_stippled_1_blt(DrawablePtr drawable,
if (src_stride <= 128) {
src_stride = ALIGN(src_stride, 8) / 4;
assert(src_stride <= 32);
- if (!kgem_check_batch(&sna->kgem, 7+src_stride) ||
+ if (!kgem_check_batch(&sna->kgem, 8+src_stride) ||
!kgem_check_bo_fenced(&sna->kgem, bo) ||
!kgem_check_reloc(&sna->kgem, 1)) {
kgem_submit(&sna->kgem);
@@ -12172,23 +12485,40 @@ sna_poly_fill_rect_stippled_1_blt(DrawablePtr drawable,
assert(sna->kgem.mode == KGEM_BLT);
b = sna->kgem.batch + sna->kgem.nbatch;
- b[0] = XY_MONO_SRC_COPY_IMM | (5 + src_stride) | br00;
- b[0] |= ((r->x - origin->x) & 7) << 17;
- b[1] = br13;
- b[2] = (r->y + dy) << 16 | (r->x + dx);
- b[3] = (r->y + r->height + dy) << 16 | (r->x + r->width + dx);
- b[4] = kgem_add_reloc(&sna->kgem,
- sna->kgem.nbatch + 4, bo,
- I915_GEM_DOMAIN_RENDER << 16 |
- I915_GEM_DOMAIN_RENDER |
- KGEM_RELOC_FENCED,
- 0);
- b[5] = gc->bgPixel;
- b[6] = gc->fgPixel;
+ if (sna->kgem.gen >= 0100) {
+ b[0] = XY_MONO_SRC_COPY_IMM | (6 + src_stride) | br00;
+ b[0] |= ((r->x - origin->x) & 7) << 17;
+ b[1] = br13;
+ b[2] = (r->y + dy) << 16 | (r->x + dx);
+ b[3] = (r->y + r->height + dy) << 16 | (r->x + r->width + dx);
+ *(uint64_t *)(b+4) =
+ kgem_add_reloc64(&sna->kgem, sna->kgem.nbatch + 4, bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ I915_GEM_DOMAIN_RENDER |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[6] = gc->bgPixel;
+ b[7] = gc->fgPixel;
- sna->kgem.nbatch += 7 + src_stride;
+ dst = (uint8_t *)&b[8];
+ sna->kgem.nbatch += 8 + src_stride;
+ } else {
+ b[0] = XY_MONO_SRC_COPY_IMM | (5 + src_stride) | br00;
+ b[0] |= ((r->x - origin->x) & 7) << 17;
+ b[1] = br13;
+ b[2] = (r->y + dy) << 16 | (r->x + dx);
+ b[3] = (r->y + r->height + dy) << 16 | (r->x + r->width + dx);
+ b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4, bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ I915_GEM_DOMAIN_RENDER |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[5] = gc->bgPixel;
+ b[6] = gc->fgPixel;
- dst = (uint8_t *)&b[7];
+ dst = (uint8_t *)&b[7];
+ sna->kgem.nbatch += 7 + src_stride;
+ }
src_stride = stipple->devKind;
src = stipple->devPrivate.ptr;
src += (r->y - origin->y) * src_stride + bx1/8;
@@ -12206,7 +12536,7 @@ sna_poly_fill_rect_stippled_1_blt(DrawablePtr drawable,
struct kgem_bo *upload;
void *ptr;
- if (!kgem_check_batch(&sna->kgem, 8) ||
+ if (!kgem_check_batch(&sna->kgem, 10) ||
!kgem_check_bo_fenced(&sna->kgem, bo) ||
!kgem_check_reloc_and_exec(&sna->kgem, 2)) {
kgem_submit(&sna->kgem);
@@ -12240,28 +12570,49 @@ sna_poly_fill_rect_stippled_1_blt(DrawablePtr drawable,
assert(sna->kgem.mode == KGEM_BLT);
b = sna->kgem.batch + sna->kgem.nbatch;
- b[0] = XY_MONO_SRC_COPY | br00;
- b[0] |= ((r->x - origin->x) & 7) << 17;
- b[1] = br13;
- b[2] = (r->y + dy) << 16 | (r->x + dx);
- b[3] = (r->y + r->height + dy) << 16 | (r->x + r->width + dx);
- b[4] = kgem_add_reloc(&sna->kgem,
- sna->kgem.nbatch + 4, bo,
- I915_GEM_DOMAIN_RENDER << 16 |
- I915_GEM_DOMAIN_RENDER |
- KGEM_RELOC_FENCED,
- 0);
- b[5] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 5,
- upload,
- I915_GEM_DOMAIN_RENDER << 16 |
- KGEM_RELOC_FENCED,
- 0);
- b[6] = gc->bgPixel;
- b[7] = gc->fgPixel;
+ if (sna->kgem.gen >= 0100) {
+ b[0] = XY_MONO_SRC_COPY | br00 | 8;
+ b[0] |= ((r->x - origin->x) & 7) << 17;
+ b[1] = br13;
+ b[2] = (r->y + dy) << 16 | (r->x + dx);
+ b[3] = (r->y + r->height + dy) << 16 | (r->x + r->width + dx);
+ *(uint64_t *)(b+4) =
+ kgem_add_reloc64(&sna->kgem, sna->kgem.nbatch + 4, bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ I915_GEM_DOMAIN_RENDER |
+ KGEM_RELOC_FENCED,
+ 0);
+ *(uint64_t *)(b+6) =
+ kgem_add_reloc64(&sna->kgem, sna->kgem.nbatch + 6, upload,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[8] = gc->bgPixel;
+ b[9] = gc->fgPixel;
+ sna->kgem.nbatch += 10;
+ } else {
+ b[0] = XY_MONO_SRC_COPY | br00 | 6;
+ b[0] |= ((r->x - origin->x) & 7) << 17;
+ b[1] = br13;
+ b[2] = (r->y + dy) << 16 | (r->x + dx);
+ b[3] = (r->y + r->height + dy) << 16 | (r->x + r->width + dx);
+ b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4, bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ I915_GEM_DOMAIN_RENDER |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[5] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 5, upload,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[6] = gc->bgPixel;
+ b[7] = gc->fgPixel;
- sna->kgem.nbatch += 8;
+ sna->kgem.nbatch += 8;
+ }
sigtrap_put();
}
+
kgem_bo_destroy(&sna->kgem, upload);
}
@@ -12313,7 +12664,7 @@ sna_poly_fill_rect_stippled_1_blt(DrawablePtr drawable,
if (src_stride <= 128) {
src_stride = ALIGN(src_stride, 8) / 4;
assert(src_stride <= 32);
- if (!kgem_check_batch(&sna->kgem, 7+src_stride) ||
+ if (!kgem_check_batch(&sna->kgem, 8+src_stride) ||
!kgem_check_bo_fenced(&sna->kgem, bo) ||
!kgem_check_reloc(&sna->kgem, 1)) {
kgem_submit(&sna->kgem);
@@ -12324,23 +12675,41 @@ sna_poly_fill_rect_stippled_1_blt(DrawablePtr drawable,
assert(sna->kgem.mode == KGEM_BLT);
b = sna->kgem.batch + sna->kgem.nbatch;
- b[0] = XY_MONO_SRC_COPY_IMM | (5 + src_stride) | br00;
- b[0] |= ((box.x1 - pat.x) & 7) << 17;
- b[1] = br13;
- b[2] = (box.y1 + dy) << 16 | (box.x1 + dx);
- b[3] = (box.y2 + dy) << 16 | (box.x2 + dx);
- b[4] = kgem_add_reloc(&sna->kgem,
- sna->kgem.nbatch + 4, bo,
- I915_GEM_DOMAIN_RENDER << 16 |
- I915_GEM_DOMAIN_RENDER |
- KGEM_RELOC_FENCED,
- 0);
- b[5] = gc->bgPixel;
- b[6] = gc->fgPixel;
+ if (sna->kgem.gen >= 0100) {
+ b[0] = XY_MONO_SRC_COPY_IMM | (6 + src_stride) | br00;
+ b[0] |= ((box.x1 - pat.x) & 7) << 17;
+ b[1] = br13;
+ b[2] = (box.y1 + dy) << 16 | (box.x1 + dx);
+ b[3] = (box.y2 + dy) << 16 | (box.x2 + dx);
+ *(uint64_t *)(b+4) =
+ kgem_add_reloc64(&sna->kgem, sna->kgem.nbatch + 4, bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ I915_GEM_DOMAIN_RENDER |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[6] = gc->bgPixel;
+ b[7] = gc->fgPixel;
- sna->kgem.nbatch += 7 + src_stride;
+ dst = (uint8_t *)&b[8];
+ sna->kgem.nbatch += 8 + src_stride;
+ } else {
+ b[0] = XY_MONO_SRC_COPY_IMM | (5 + src_stride) | br00;
+ b[0] |= ((box.x1 - pat.x) & 7) << 17;
+ b[1] = br13;
+ b[2] = (box.y1 + dy) << 16 | (box.x1 + dx);
+ b[3] = (box.y2 + dy) << 16 | (box.x2 + dx);
+ b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4, bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ I915_GEM_DOMAIN_RENDER |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[5] = gc->bgPixel;
+ b[6] = gc->fgPixel;
+
+ dst = (uint8_t *)&b[7];
+ sna->kgem.nbatch += 7 + src_stride;
+ }
- dst = (uint8_t *)&b[7];
src_stride = stipple->devKind;
src = stipple->devPrivate.ptr;
src += (box.y1 - pat.y) * src_stride + bx1/8;
@@ -12355,7 +12724,7 @@ sna_poly_fill_rect_stippled_1_blt(DrawablePtr drawable,
src += src_stride;
} while (--bh);
} else {
- if (!kgem_check_batch(&sna->kgem, 8) ||
+ if (!kgem_check_batch(&sna->kgem, 10) ||
!kgem_check_bo_fenced(&sna->kgem, bo) ||
!kgem_check_reloc_and_exec(&sna->kgem, 2)) {
kgem_submit(&sna->kgem);
@@ -12389,26 +12758,46 @@ sna_poly_fill_rect_stippled_1_blt(DrawablePtr drawable,
assert(sna->kgem.mode == KGEM_BLT);
b = sna->kgem.batch + sna->kgem.nbatch;
- b[0] = XY_MONO_SRC_COPY | br00;
- b[0] |= ((box.x1 - pat.x) & 7) << 17;
- b[1] = br13;
- b[2] = (box.y1 + dy) << 16 | (box.x1 + dx);
- b[3] = (box.y2 + dy) << 16 | (box.x2 + dx);
- b[4] = kgem_add_reloc(&sna->kgem,
- sna->kgem.nbatch + 4, bo,
- I915_GEM_DOMAIN_RENDER << 16 |
- I915_GEM_DOMAIN_RENDER |
- KGEM_RELOC_FENCED,
- 0);
- b[5] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 5,
- upload,
- I915_GEM_DOMAIN_RENDER << 16 |
- KGEM_RELOC_FENCED,
- 0);
- b[6] = gc->bgPixel;
- b[7] = gc->fgPixel;
+ if (sna->kgem.gen >= 0100) {
+ b[0] = XY_MONO_SRC_COPY | br00 | 8;
+ b[0] |= ((box.x1 - pat.x) & 7) << 17;
+ b[1] = br13;
+ b[2] = (box.y1 + dy) << 16 | (box.x1 + dx);
+ b[3] = (box.y2 + dy) << 16 | (box.x2 + dx);
+ *(uint64_t *)(b+4) =
+ kgem_add_reloc64(&sna->kgem, sna->kgem.nbatch + 4, bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ I915_GEM_DOMAIN_RENDER |
+ KGEM_RELOC_FENCED,
+ 0);
+ *(uint64_t *)(b+5) =
+ kgem_add_reloc64(&sna->kgem, sna->kgem.nbatch + 6, upload,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[8] = gc->bgPixel;
+ b[9] = gc->fgPixel;
+ sna->kgem.nbatch += 10;
+ } else {
+ b[0] = XY_MONO_SRC_COPY | br00 | 6;
+ b[0] |= ((box.x1 - pat.x) & 7) << 17;
+ b[1] = br13;
+ b[2] = (box.y1 + dy) << 16 | (box.x1 + dx);
+ b[3] = (box.y2 + dy) << 16 | (box.x2 + dx);
+ b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4, bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ I915_GEM_DOMAIN_RENDER |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[5] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 5, upload,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[6] = gc->bgPixel;
+ b[7] = gc->fgPixel;
- sna->kgem.nbatch += 8;
+ sna->kgem.nbatch += 8;
+ }
sigtrap_put();
}
kgem_bo_destroy(&sna->kgem, upload);
@@ -12463,7 +12852,7 @@ sna_poly_fill_rect_stippled_1_blt(DrawablePtr drawable,
if (src_stride <= 128) {
src_stride = ALIGN(src_stride, 8) / 4;
assert(src_stride <= 32);
- if (!kgem_check_batch(&sna->kgem, 7+src_stride) ||
+ if (!kgem_check_batch(&sna->kgem, 8+src_stride) ||
!kgem_check_bo_fenced(&sna->kgem, bo) ||
!kgem_check_reloc(&sna->kgem, 1)) {
kgem_submit(&sna->kgem);
@@ -12474,23 +12863,40 @@ sna_poly_fill_rect_stippled_1_blt(DrawablePtr drawable,
assert(sna->kgem.mode == KGEM_BLT);
b = sna->kgem.batch + sna->kgem.nbatch;
- b[0] = XY_MONO_SRC_COPY_IMM | (5 + src_stride) | br00;
- b[0] |= ((box.x1 - pat.x) & 7) << 17;
- b[1] = br13;
- b[2] = (box.y1 + dy) << 16 | (box.x1 + dx);
- b[3] = (box.y2 + dy) << 16 | (box.x2 + dx);
- b[4] = kgem_add_reloc(&sna->kgem,
- sna->kgem.nbatch + 4, bo,
- I915_GEM_DOMAIN_RENDER << 16 |
- I915_GEM_DOMAIN_RENDER |
- KGEM_RELOC_FENCED,
- 0);
- b[5] = gc->bgPixel;
- b[6] = gc->fgPixel;
+ if (sna->kgem.gen >= 0100) {
+ b[0] = XY_MONO_SRC_COPY_IMM | (6 + src_stride) | br00;
+ b[0] |= ((box.x1 - pat.x) & 7) << 17;
+ b[1] = br13;
+ b[2] = (box.y1 + dy) << 16 | (box.x1 + dx);
+ b[3] = (box.y2 + dy) << 16 | (box.x2 + dx);
+ *(uint64_t *)(b+4) =
+ kgem_add_reloc64(&sna->kgem, sna->kgem.nbatch + 4, bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ I915_GEM_DOMAIN_RENDER |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[6] = gc->bgPixel;
+ b[7] = gc->fgPixel;
- sna->kgem.nbatch += 7 + src_stride;
+ dst = (uint8_t *)&b[8];
+ sna->kgem.nbatch += 8 + src_stride;
+ } else {
+ b[0] = XY_MONO_SRC_COPY_IMM | (5 + src_stride) | br00;
+ b[0] |= ((box.x1 - pat.x) & 7) << 17;
+ b[1] = br13;
+ b[2] = (box.y1 + dy) << 16 | (box.x1 + dx);
+ b[3] = (box.y2 + dy) << 16 | (box.x2 + dx);
+ b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4, bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ I915_GEM_DOMAIN_RENDER |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[5] = gc->bgPixel;
+ b[6] = gc->fgPixel;
- dst = (uint8_t *)&b[7];
+ dst = (uint8_t *)&b[7];
+ sna->kgem.nbatch += 7 + src_stride;
+ }
src_stride = stipple->devKind;
src = stipple->devPrivate.ptr;
src += (box.y1 - pat.y) * src_stride + bx1/8;
@@ -12505,7 +12911,7 @@ sna_poly_fill_rect_stippled_1_blt(DrawablePtr drawable,
src += src_stride;
} while (--bh);
} else {
- if (!kgem_check_batch(&sna->kgem, 8) ||
+ if (!kgem_check_batch(&sna->kgem, 10) ||
!kgem_check_bo_fenced(&sna->kgem, bo) ||
!kgem_check_reloc_and_exec(&sna->kgem, 2)) {
kgem_submit(&sna->kgem);
@@ -12539,26 +12945,46 @@ sna_poly_fill_rect_stippled_1_blt(DrawablePtr drawable,
assert(sna->kgem.mode == KGEM_BLT);
b = sna->kgem.batch + sna->kgem.nbatch;
- b[0] = XY_MONO_SRC_COPY | br00;
- b[0] |= ((box.x1 - pat.x) & 7) << 17;
- b[1] = br13;
- b[2] = (box.y1 + dy) << 16 | (box.x1 + dx);
- b[3] = (box.y2 + dy) << 16 | (box.x2 + dx);
- b[4] = kgem_add_reloc(&sna->kgem,
- sna->kgem.nbatch + 4, bo,
- I915_GEM_DOMAIN_RENDER << 16 |
- I915_GEM_DOMAIN_RENDER |
- KGEM_RELOC_FENCED,
- 0);
- b[5] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 5,
- upload,
- I915_GEM_DOMAIN_RENDER << 16 |
- KGEM_RELOC_FENCED,
- 0);
- b[6] = gc->bgPixel;
- b[7] = gc->fgPixel;
-
- sna->kgem.nbatch += 8;
+ if (sna->kgem.gen >= 0100) {
+ b[0] = XY_MONO_SRC_COPY | br00 | 8;
+ b[0] |= ((box.x1 - pat.x) & 7) << 17;
+ b[1] = br13;
+ b[2] = (box.y1 + dy) << 16 | (box.x1 + dx);
+ b[3] = (box.y2 + dy) << 16 | (box.x2 + dx);
+ *(uint64_t *)(b+4) =
+ kgem_add_reloc64(&sna->kgem, sna->kgem.nbatch + 4, bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ I915_GEM_DOMAIN_RENDER |
+ KGEM_RELOC_FENCED,
+ 0);
+ *(uint64_t *)(b+6) =
+ kgem_add_reloc64(&sna->kgem, sna->kgem.nbatch + 6, upload,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[8] = gc->bgPixel;
+ b[9] = gc->fgPixel;
+ sna->kgem.nbatch += 10;
+ } else {
+ b[0] = XY_MONO_SRC_COPY | br00 | 6;
+ b[0] |= ((box.x1 - pat.x) & 7) << 17;
+ b[1] = br13;
+ b[2] = (box.y1 + dy) << 16 | (box.x1 + dx);
+ b[3] = (box.y2 + dy) << 16 | (box.x2 + dx);
+ b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4, bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ I915_GEM_DOMAIN_RENDER |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[5] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 5, upload,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[6] = gc->bgPixel;
+ b[7] = gc->fgPixel;
+
+ sna->kgem.nbatch += 8;
+ }
sigtrap_put();
}
kgem_bo_destroy(&sna->kgem, upload);
@@ -12622,7 +13048,7 @@ sna_poly_fill_rect_stippled_n_box__imm(struct sna *sna,
len = bw*bh;
len = ALIGN(len, 8) / 4;
assert(len <= 32);
- if (!kgem_check_batch(&sna->kgem, 7+len) ||
+ if (!kgem_check_batch(&sna->kgem, 8+len) ||
!kgem_check_bo_fenced(&sna->kgem, bo) ||
!kgem_check_reloc(&sna->kgem, 1)) {
kgem_submit(&sna->kgem);
@@ -12633,22 +13059,37 @@ sna_poly_fill_rect_stippled_n_box__imm(struct sna *sna,
assert(sna->kgem.mode == KGEM_BLT);
b = sna->kgem.batch + sna->kgem.nbatch;
- b[0] = br00 | (5 + len) | (ox & 7) << 17;
- b[1] = br13;
- b[2] = y1 << 16 | x1;
- b[3] = y2 << 16 | x2;
- b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4,
- bo,
- I915_GEM_DOMAIN_RENDER << 16 |
- I915_GEM_DOMAIN_RENDER |
- KGEM_RELOC_FENCED,
- 0);
- b[5] = gc->bgPixel;
- b[6] = gc->fgPixel;
-
- sna->kgem.nbatch += 7 + len;
+ if (sna->kgem.gen >= 0100) {
+ b[0] = br00 | (6 + len) | (ox & 7) << 17;
+ b[1] = br13;
+ b[2] = y1 << 16 | x1;
+ b[3] = y2 << 16 | x2;
+ *(uint64_t *)(b+4) =
+ kgem_add_reloc64(&sna->kgem, sna->kgem.nbatch + 4, bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ I915_GEM_DOMAIN_RENDER |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[6] = gc->bgPixel;
+ b[7] = gc->fgPixel;
+ dst = (uint8_t *)&b[8];
+ sna->kgem.nbatch += 8 + len;
+ } else {
+ b[0] = br00 | (5 + len) | (ox & 7) << 17;
+ b[1] = br13;
+ b[2] = y1 << 16 | x1;
+ b[3] = y2 << 16 | x2;
+ b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4, bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ I915_GEM_DOMAIN_RENDER |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[5] = gc->bgPixel;
+ b[6] = gc->fgPixel;
+ dst = (uint8_t *)&b[7];
+ sna->kgem.nbatch += 7 + len;
+ }
- dst = (uint8_t *)&b[7];
len = gc->stipple->devKind;
src = gc->stipple->devPrivate.ptr;
src += oy*len + ox/8;
@@ -12729,7 +13170,7 @@ sna_poly_fill_rect_stippled_n_box(struct sna *sna,
len = bw*bh;
len = ALIGN(len, 8) / 4;
- if (!kgem_check_batch(&sna->kgem, 7+len) ||
+ if (!kgem_check_batch(&sna->kgem, 8+len) ||
!kgem_check_bo_fenced(&sna->kgem, bo) ||
!kgem_check_reloc(&sna->kgem, 2)) {
kgem_submit(&sna->kgem);
@@ -12744,25 +13185,45 @@ sna_poly_fill_rect_stippled_n_box(struct sna *sna,
if (!use_tile && len <= 32) {
uint8_t *dst, *src;
- b[0] = XY_MONO_SRC_COPY_IMM;
- b[0] |= (br00 & (BLT_DST_TILED | 3 << 20));
- b[0] |= (ox & 7) << 17;
- b[0] |= (5 + len);
- b[1] = br13;
- b[2] = y1 << 16 | x1;
- b[3] = y2 << 16 | x2;
- b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4,
- bo,
- I915_GEM_DOMAIN_RENDER << 16 |
- I915_GEM_DOMAIN_RENDER |
- KGEM_RELOC_FENCED,
- 0);
- b[5] = gc->bgPixel;
- b[6] = gc->fgPixel;
+ if (sna->kgem.gen >= 0100) {
+ b[0] = XY_MONO_SRC_COPY_IMM;
+ b[0] |= (br00 & (BLT_DST_TILED | 3 << 20));
+ b[0] |= (ox & 7) << 17;
+ b[0] |= (6 + len);
+ b[1] = br13;
+ b[2] = y1 << 16 | x1;
+ b[3] = y2 << 16 | x2;
+ *(uint64_t *)(b+4) =
+ kgem_add_reloc64(&sna->kgem, sna->kgem.nbatch + 4, bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ I915_GEM_DOMAIN_RENDER |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[6] = gc->bgPixel;
+ b[7] = gc->fgPixel;
- sna->kgem.nbatch += 7 + len;
+ dst = (uint8_t *)&b[8];
+ sna->kgem.nbatch += 8 + len;
+ } else {
+ b[0] = XY_MONO_SRC_COPY_IMM;
+ b[0] |= (br00 & (BLT_DST_TILED | 3 << 20));
+ b[0] |= (ox & 7) << 17;
+ b[0] |= (5 + len);
+ b[1] = br13;
+ b[2] = y1 << 16 | x1;
+ b[3] = y2 << 16 | x2;
+ b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4, bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ I915_GEM_DOMAIN_RENDER |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[5] = gc->bgPixel;
+ b[6] = gc->fgPixel;
+
+ dst = (uint8_t *)&b[7];
+ sna->kgem.nbatch += 7 + len;
+ }
- dst = (uint8_t *)&b[7];
len = gc->stipple->devKind;
src = gc->stipple->devPrivate.ptr;
src += oy*len + ox/8;
@@ -12794,25 +13255,43 @@ sna_poly_fill_rect_stippled_n_box(struct sna *sna,
assert(sna->kgem.mode == KGEM_BLT);
b = sna->kgem.batch + sna->kgem.nbatch;
- b[0] = br00 | (ox & 7) << 17;
- b[1] = br13;
- b[2] = y1 << 16 | x1;
- b[3] = y2 << 16 | x2;
- b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4,
- bo,
- I915_GEM_DOMAIN_RENDER << 16 |
- I915_GEM_DOMAIN_RENDER |
- KGEM_RELOC_FENCED,
- 0);
- b[5] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 5,
- upload,
- I915_GEM_DOMAIN_RENDER << 16 |
- KGEM_RELOC_FENCED,
- 0);
- b[6] = gc->bgPixel;
- b[7] = gc->fgPixel;
-
- sna->kgem.nbatch += 8;
+ if (sna->kgem.gen >= 0100) {
+ b[0] = br00 | (ox & 7) << 17 | 8;
+ b[1] = br13;
+ b[2] = y1 << 16 | x1;
+ b[3] = y2 << 16 | x2;
+ *(uint64_t *)(b+4) =
+ kgem_add_reloc64(&sna->kgem, sna->kgem.nbatch + 4, bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ I915_GEM_DOMAIN_RENDER |
+ KGEM_RELOC_FENCED,
+ 0);
+ *(uint64_t *)(b+6) =
+ kgem_add_reloc64(&sna->kgem, sna->kgem.nbatch + 6, upload,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[8] = gc->bgPixel;
+ b[9] = gc->fgPixel;
+ sna->kgem.nbatch += 10;
+ } else {
+ b[0] = br00 | (ox & 7) << 17 | 6;
+ b[1] = br13;
+ b[2] = y1 << 16 | x1;
+ b[3] = y2 << 16 | x2;
+ b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4, bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ I915_GEM_DOMAIN_RENDER |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[5] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 5, upload,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[6] = gc->bgPixel;
+ b[7] = gc->fgPixel;
+ sna->kgem.nbatch += 8;
+ }
if (!has_tile) {
dst = ptr;
@@ -13702,7 +14181,7 @@ sna_glyph_blt(DrawablePtr drawable, GCPtr gc,
}
kgem_set_mode(&sna->kgem, KGEM_BLT, bo);
- if (!kgem_check_batch(&sna->kgem, 16) ||
+ if (!kgem_check_batch(&sna->kgem, 20) ||
!kgem_check_bo_fenced(&sna->kgem, bo) ||
!kgem_check_reloc(&sna->kgem, 1)) {
kgem_submit(&sna->kgem);
@@ -13723,24 +14202,47 @@ sna_glyph_blt(DrawablePtr drawable, GCPtr gc,
assert(sna->kgem.mode == KGEM_BLT);
b = sna->kgem.batch + sna->kgem.nbatch;
- b[0] = XY_SETUP_BLT | 3 << 20;
- b[1] = bo->pitch;
- if (sna->kgem.gen >= 040 && bo->tiling) {
- b[0] |= BLT_DST_TILED;
- b[1] >>= 2;
- }
- b[1] |= 1 << 30 | transparent << 29 | blt_depth(drawable->depth) << 24 | rop << 16;
- b[2] = extents->y1 << 16 | extents->x1;
- b[3] = extents->y2 << 16 | extents->x2;
- b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4, bo,
- I915_GEM_DOMAIN_RENDER << 16 |
- I915_GEM_DOMAIN_RENDER |
- KGEM_RELOC_FENCED,
- 0);
- b[5] = bg;
- b[6] = fg;
- b[7] = 0;
- sna->kgem.nbatch += 8;
+ if (sna->kgem.gen >= 0100) {
+ b[0] = XY_SETUP_BLT | 3 << 20 | 8;
+ b[1] = bo->pitch;
+ if (sna->kgem.gen >= 040 && bo->tiling) {
+ b[0] |= BLT_DST_TILED;
+ b[1] >>= 2;
+ }
+ b[1] |= 1 << 30 | transparent << 29 | blt_depth(drawable->depth) << 24 | rop << 16;
+ b[2] = extents->y1 << 16 | extents->x1;
+ b[3] = extents->y2 << 16 | extents->x2;
+ *(uint64_t *)(b+4) =
+ kgem_add_reloc64(&sna->kgem, sna->kgem.nbatch + 4, bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ I915_GEM_DOMAIN_RENDER |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[6] = bg;
+ b[7] = fg;
+ b[8] = 0;
+ b[9] = 0;
+ sna->kgem.nbatch += 10;
+ } else {
+ b[0] = XY_SETUP_BLT | 3 << 20 | 6;
+ b[1] = bo->pitch;
+ if (sna->kgem.gen >= 040 && bo->tiling) {
+ b[0] |= BLT_DST_TILED;
+ b[1] >>= 2;
+ }
+ b[1] |= 1 << 30 | transparent << 29 | blt_depth(drawable->depth) << 24 | rop << 16;
+ b[2] = extents->y1 << 16 | extents->x1;
+ b[3] = extents->y2 << 16 | extents->x2;
+ b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4, bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ I915_GEM_DOMAIN_RENDER |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[5] = bg;
+ b[6] = fg;
+ b[7] = 0;
+ sna->kgem.nbatch += 8;
+ }
br00 = XY_TEXT_IMMEDIATE_BLT;
if (bo->tiling && sna->kgem.gen >= 040)
@@ -13786,24 +14288,47 @@ sna_glyph_blt(DrawablePtr drawable, GCPtr gc,
assert(sna->kgem.mode == KGEM_BLT);
b = sna->kgem.batch + sna->kgem.nbatch;
- b[0] = XY_SETUP_BLT | 3 << 20;
- b[1] = bo->pitch;
- if (sna->kgem.gen >= 040 && bo->tiling) {
- b[0] |= BLT_DST_TILED;
- b[1] >>= 2;
+ if (sna->kgem.gen >= 0100) {
+ b[0] = XY_SETUP_BLT | 3 << 20 | 8;
+ b[1] = bo->pitch;
+ if (bo->tiling) {
+ b[0] |= BLT_DST_TILED;
+ b[1] >>= 2;
+ }
+ b[1] |= 1 << 30 | transparent << 29 | blt_depth(drawable->depth) << 24 | rop << 16;
+ b[2] = extents->y1 << 16 | extents->x1;
+ b[3] = extents->y2 << 16 | extents->x2;
+ *(uint64_t *)(b+4) =
+ kgem_add_reloc64(&sna->kgem, sna->kgem.nbatch + 4, bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ I915_GEM_DOMAIN_RENDER |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[6] = bg;
+ b[7] = fg;
+ b[8] = 0;
+ b[9] = 0;
+ sna->kgem.nbatch += 10;
+ } else {
+ b[0] = XY_SETUP_BLT | 3 << 20 | 6;
+ b[1] = bo->pitch;
+ if (sna->kgem.gen >= 040 && bo->tiling) {
+ b[0] |= BLT_DST_TILED;
+ b[1] >>= 2;
+ }
+ b[1] |= 1 << 30 | transparent << 29 | blt_depth(drawable->depth) << 24 | rop << 16;
+ b[2] = extents->y1 << 16 | extents->x1;
+ b[3] = extents->y2 << 16 | extents->x2;
+ b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4, bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ I915_GEM_DOMAIN_RENDER |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[5] = bg;
+ b[6] = fg;
+ b[7] = 0;
+ sna->kgem.nbatch += 8;
}
- b[1] |= 1 << 30 | transparent << 29 | blt_depth(drawable->depth) << 24 | rop << 16;
- b[2] = extents->y1 << 16 | extents->x1;
- b[3] = extents->y2 << 16 | extents->x2;
- b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4, bo,
- I915_GEM_DOMAIN_RENDER << 16 |
- I915_GEM_DOMAIN_RENDER |
- KGEM_RELOC_FENCED,
- 0);
- b[5] = bg;
- b[6] = fg;
- b[7] = 0;
- sna->kgem.nbatch += 8;
}
assert(sna->kgem.mode == KGEM_BLT);
@@ -14370,7 +14895,7 @@ sna_reversed_glyph_blt(DrawablePtr drawable, GCPtr gc,
}
kgem_set_mode(&sna->kgem, KGEM_BLT, bo);
- if (!kgem_check_batch(&sna->kgem, 16) ||
+ if (!kgem_check_batch(&sna->kgem, 20) ||
!kgem_check_bo_fenced(&sna->kgem, bo) ||
!kgem_check_reloc(&sna->kgem, 1)) {
kgem_submit(&sna->kgem);
@@ -14391,24 +14916,47 @@ sna_reversed_glyph_blt(DrawablePtr drawable, GCPtr gc,
assert(sna->kgem.mode == KGEM_BLT);
b = sna->kgem.batch + sna->kgem.nbatch;
- b[0] = XY_SETUP_BLT | 1 << 20;
- b[1] = bo->pitch;
- if (sna->kgem.gen >= 040 && bo->tiling) {
- b[0] |= BLT_DST_TILED;
- b[1] >>= 2;
- }
- b[1] |= 1 << 30 | transparent << 29 | blt_depth(drawable->depth) << 24 | rop << 16;
- b[2] = extents->y1 << 16 | extents->x1;
- b[3] = extents->y2 << 16 | extents->x2;
- b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4, bo,
- I915_GEM_DOMAIN_RENDER << 16 |
- I915_GEM_DOMAIN_RENDER |
- KGEM_RELOC_FENCED,
- 0);
- b[5] = bg;
- b[6] = fg;
- b[7] = 0;
- sna->kgem.nbatch += 8;
+ if (sna->kgem.gen >= 0100) {
+ b[0] = XY_SETUP_BLT | 1 << 20 | 8;
+ b[1] = bo->pitch;
+ if (sna->kgem.gen >= 040 && bo->tiling) {
+ b[0] |= BLT_DST_TILED;
+ b[1] >>= 2;
+ }
+ b[1] |= 1 << 30 | transparent << 29 | blt_depth(drawable->depth) << 24 | rop << 16;
+ b[2] = extents->y1 << 16 | extents->x1;
+ b[3] = extents->y2 << 16 | extents->x2;
+ *(uint64_t *)(b+4) =
+ kgem_add_reloc64(&sna->kgem, sna->kgem.nbatch + 4, bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ I915_GEM_DOMAIN_RENDER |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[6] = bg;
+ b[7] = fg;
+ b[8] = 0;
+ b[9] = 0;
+ sna->kgem.nbatch += 10;
+ } else {
+ b[0] = XY_SETUP_BLT | 1 << 20 | 6;
+ b[1] = bo->pitch;
+ if (sna->kgem.gen >= 040 && bo->tiling) {
+ b[0] |= BLT_DST_TILED;
+ b[1] >>= 2;
+ }
+ b[1] |= 1 << 30 | transparent << 29 | blt_depth(drawable->depth) << 24 | rop << 16;
+ b[2] = extents->y1 << 16 | extents->x1;
+ b[3] = extents->y2 << 16 | extents->x2;
+ b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4, bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ I915_GEM_DOMAIN_RENDER |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[5] = bg;
+ b[6] = fg;
+ b[7] = 0;
+ sna->kgem.nbatch += 8;
+ }
do {
CharInfoPtr *info = _info;
@@ -14476,25 +15024,47 @@ sna_reversed_glyph_blt(DrawablePtr drawable, GCPtr gc,
assert(sna->kgem.mode == KGEM_BLT);
b = sna->kgem.batch + sna->kgem.nbatch;
- b[0] = XY_SETUP_BLT | 1 << 20;
- b[1] = bo->pitch;
- if (sna->kgem.gen >= 040 && bo->tiling) {
- b[0] |= BLT_DST_TILED;
- b[1] >>= 2;
+ if (sna->kgem.gen >= 0100) {
+ b[0] = XY_SETUP_BLT | 1 << 20 | 8;
+ b[1] = bo->pitch;
+ if (bo->tiling) {
+ b[0] |= BLT_DST_TILED;
+ b[1] >>= 2;
+ }
+ b[1] |= 1 << 30 | transparent << 29 | blt_depth(drawable->depth) << 24 | rop << 16;
+ b[2] = extents->y1 << 16 | extents->x1;
+ b[3] = extents->y2 << 16 | extents->x2;
+ *(uint64_t *)(b+4) =
+ kgem_add_reloc64(&sna->kgem, sna->kgem.nbatch + 4, bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ I915_GEM_DOMAIN_RENDER |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[6] = bg;
+ b[7] = fg;
+ b[8] = 0;
+ b[9] = 0;
+ sna->kgem.nbatch += 10;
+ } else {
+ b[0] = XY_SETUP_BLT | 1 << 20 | 6;
+ b[1] = bo->pitch;
+ if (sna->kgem.gen >= 040 && bo->tiling) {
+ b[0] |= BLT_DST_TILED;
+ b[1] >>= 2;
+ }
+ b[1] |= 1 << 30 | transparent << 29 | blt_depth(drawable->depth) << 24 | rop << 16;
+ b[2] = extents->y1 << 16 | extents->x1;
+ b[3] = extents->y2 << 16 | extents->x2;
+ b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4, bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ I915_GEM_DOMAIN_RENDER |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[5] = bg;
+ b[6] = fg;
+ b[7] = 0;
+ sna->kgem.nbatch += 8;
}
- b[1] |= 1 << 30 | transparent << 29 | blt_depth(drawable->depth) << 24 | rop << 16;
- b[2] = extents->y1 << 16 | extents->x1;
- b[3] = extents->y2 << 16 | extents->x2;
- b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4,
- bo,
- I915_GEM_DOMAIN_RENDER << 16 |
- I915_GEM_DOMAIN_RENDER |
- KGEM_RELOC_FENCED,
- 0);
- b[5] = bg;
- b[6] = fg;
- b[7] = 0;
- sna->kgem.nbatch += 8;
}
assert(sna->kgem.mode == KGEM_BLT);
@@ -14789,7 +15359,7 @@ sna_push_pixels_solid_blt(GCPtr gc,
struct kgem_bo *upload;
void *ptr;
- if (!kgem_check_batch(&sna->kgem, 8) ||
+ if (!kgem_check_batch(&sna->kgem, 10) ||
!kgem_check_bo_fenced(&sna->kgem, bo) ||
!kgem_check_reloc_and_exec(&sna->kgem, 2)) {
kgem_submit(&sna->kgem);
@@ -14827,34 +15397,63 @@ sna_push_pixels_solid_blt(GCPtr gc,
assert(sna->kgem.mode == KGEM_BLT);
b = sna->kgem.batch + sna->kgem.nbatch;
- b[0] = XY_MONO_SRC_COPY | 3 << 20;
- b[0] |= ((box->x1 - region->extents.x1) & 7) << 17;
- b[1] = bo->pitch;
- if (sna->kgem.gen >= 040 && bo->tiling) {
- b[0] |= BLT_DST_TILED;
- b[1] >>= 2;
- }
- b[1] |= 1 << 29;
- b[1] |= blt_depth(drawable->depth) << 24;
- b[1] |= rop << 16;
- b[2] = box->y1 << 16 | box->x1;
- b[3] = box->y2 << 16 | box->x2;
- b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4, bo,
- I915_GEM_DOMAIN_RENDER << 16 |
- I915_GEM_DOMAIN_RENDER |
- KGEM_RELOC_FENCED,
- 0);
- b[5] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 5,
- upload,
- I915_GEM_DOMAIN_RENDER << 16 |
- KGEM_RELOC_FENCED,
- 0);
- b[6] = gc->bgPixel;
- b[7] = gc->fgPixel;
+ if (sna->kgem.gen >= 0100) {
+ b[0] = XY_MONO_SRC_COPY | 3 << 20 | 8;
+ b[0] |= ((box->x1 - region->extents.x1) & 7) << 17;
+ b[1] = bo->pitch;
+ if (sna->kgem.gen >= 040 && bo->tiling) {
+ b[0] |= BLT_DST_TILED;
+ b[1] >>= 2;
+ }
+ b[1] |= 1 << 29;
+ b[1] |= blt_depth(drawable->depth) << 24;
+ b[1] |= rop << 16;
+ b[2] = box->y1 << 16 | box->x1;
+ b[3] = box->y2 << 16 | box->x2;
+ *(uint64_t *)(b+4) =
+ kgem_add_reloc64(&sna->kgem, sna->kgem.nbatch + 4, bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ I915_GEM_DOMAIN_RENDER |
+ KGEM_RELOC_FENCED,
+ 0);
+ *(uint64_t *)(b+6) =
+ kgem_add_reloc64(&sna->kgem, sna->kgem.nbatch + 6, upload,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[8] = gc->bgPixel;
+ b[9] = gc->fgPixel;
+ sna->kgem.nbatch += 10;
+ } else {
+ b[0] = XY_MONO_SRC_COPY | 3 << 20 | 6;
+ b[0] |= ((box->x1 - region->extents.x1) & 7) << 17;
+ b[1] = bo->pitch;
+ if (sna->kgem.gen >= 040 && bo->tiling) {
+ b[0] |= BLT_DST_TILED;
+ b[1] >>= 2;
+ }
+ b[1] |= 1 << 29;
+ b[1] |= blt_depth(drawable->depth) << 24;
+ b[1] |= rop << 16;
+ b[2] = box->y1 << 16 | box->x1;
+ b[3] = box->y2 << 16 | box->x2;
+ b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4, bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ I915_GEM_DOMAIN_RENDER |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[5] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 5, upload,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[6] = gc->bgPixel;
+ b[7] = gc->fgPixel;
- sna->kgem.nbatch += 8;
+ sna->kgem.nbatch += 8;
+ }
sigtrap_put();
}
+
kgem_bo_destroy(&sna->kgem, upload);
box++;
diff --git a/src/sna/sna_blt.c b/src/sna/sna_blt.c
index e63b360c..f5abeffc 100644
--- a/src/sna/sna_blt.c
+++ b/src/sna/sna_blt.c
@@ -158,7 +158,7 @@ static bool sna_blt_fill_init(struct sna *sna,
blt->bpp = bpp;
kgem_set_mode(kgem, KGEM_BLT, bo);
- if (!kgem_check_batch(kgem, 12) ||
+ if (!kgem_check_batch(kgem, 14) ||
!kgem_check_bo_fenced(kgem, bo)) {
kgem_submit(kgem);
if (!kgem_check_bo_fenced(kgem, bo))
@@ -181,22 +181,42 @@ static bool sna_blt_fill_init(struct sna *sna,
assert(sna->kgem.mode == KGEM_BLT);
b = kgem->batch + kgem->nbatch;
- b[0] = XY_SETUP_MONO_PATTERN_SL_BLT;
- if (bpp == 32)
- b[0] |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
- b[1] = blt->br13;
- b[2] = 0;
- b[3] = 0;
- b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, bo,
- I915_GEM_DOMAIN_RENDER << 16 |
- I915_GEM_DOMAIN_RENDER |
- KGEM_RELOC_FENCED,
- 0);
- b[5] = pixel;
- b[6] = pixel;
- b[7] = 0;
- b[8] = 0;
- kgem->nbatch += 9;
+ if (sna->kgem.gen >= 0100) {
+ b[0] = XY_SETUP_MONO_PATTERN_SL_BLT | 8;
+ if (bpp == 32)
+ b[0] |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
+ b[1] = blt->br13;
+ b[2] = 0;
+ b[3] = 0;
+ *(uint64_t *)(b+4) =
+ kgem_add_reloc64(kgem, kgem->nbatch + 4, bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ I915_GEM_DOMAIN_RENDER |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[6] = pixel;
+ b[7] = pixel;
+ b[8] = 0;
+ b[9] = 0;
+ kgem->nbatch += 10;
+ } else {
+ b[0] = XY_SETUP_MONO_PATTERN_SL_BLT | 7;
+ if (bpp == 32)
+ b[0] |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
+ b[1] = blt->br13;
+ b[2] = 0;
+ b[3] = 0;
+ b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ I915_GEM_DOMAIN_RENDER |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[5] = pixel;
+ b[6] = pixel;
+ b[7] = 0;
+ b[8] = 0;
+ kgem->nbatch += 9;
+ }
assert(kgem->nbatch < kgem->surface);
sna->blt_state.fill_bo = bo->unique_id;
@@ -218,22 +238,42 @@ noinline static void sna_blt_fill_begin(struct sna *sna,
assert(kgem->nbatch == 0);
b = kgem->batch;
- b[0] = XY_SETUP_MONO_PATTERN_SL_BLT;
- if (blt->bpp == 32)
- b[0] |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
- b[1] = blt->br13;
- b[2] = 0;
- b[3] = 0;
- b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, blt->bo[0],
- I915_GEM_DOMAIN_RENDER << 16 |
- I915_GEM_DOMAIN_RENDER |
- KGEM_RELOC_FENCED,
- 0);
- b[5] = blt->pixel;
- b[6] = blt->pixel;
- b[7] = 0;
- b[8] = 0;
- kgem->nbatch = 9;
+ if (sna->kgem.gen >= 0100) {
+ b[0] = XY_SETUP_MONO_PATTERN_SL_BLT | 8;
+ if (blt->bpp == 32)
+ b[0] |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
+ b[1] = blt->br13;
+ b[2] = 0;
+ b[3] = 0;
+ *(uint32_t *)(b+4) =
+ kgem_add_reloc64(kgem, kgem->nbatch + 4, blt->bo[0],
+ I915_GEM_DOMAIN_RENDER << 16 |
+ I915_GEM_DOMAIN_RENDER |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[6] = blt->pixel;
+ b[7] = blt->pixel;
+ b[9] = 0;
+ b[9] = 0;
+ kgem->nbatch = 10;
+ } else {
+ b[0] = XY_SETUP_MONO_PATTERN_SL_BLT | 7;
+ if (blt->bpp == 32)
+ b[0] |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
+ b[1] = blt->br13;
+ b[2] = 0;
+ b[3] = 0;
+ b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, blt->bo[0],
+ I915_GEM_DOMAIN_RENDER << 16 |
+ I915_GEM_DOMAIN_RENDER |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[5] = blt->pixel;
+ b[6] = blt->pixel;
+ b[7] = 0;
+ b[8] = 0;
+ kgem->nbatch = 9;
+ }
}
inline static void sna_blt_fill_one(struct sna *sna,
@@ -279,7 +319,7 @@ static bool sna_blt_copy_init(struct sna *sna,
blt->bo[0] = src;
blt->bo[1] = dst;
- blt->cmd = XY_SRC_COPY_BLT_CMD;
+ blt->cmd = XY_SRC_COPY_BLT_CMD | (kgem->gen >= 0100 ? 8 : 6);
if (bpp == 32)
blt->cmd |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
@@ -332,7 +372,7 @@ static bool sna_blt_alpha_fixup_init(struct sna *sna,
blt->bo[0] = src;
blt->bo[1] = dst;
- blt->cmd = XY_FULL_MONO_PATTERN_BLT;
+ blt->cmd = XY_FULL_MONO_PATTERN_BLT | (kgem->gen >= 0100 ? 12 : 10);
blt->pitch[0] = src->pitch;
if (kgem->gen >= 040 && src->tiling) {
blt->cmd |= BLT_SRC_TILED;
@@ -391,7 +431,7 @@ static void sna_blt_alpha_fixup_one(struct sna *sna,
assert(width > 0);
assert(height > 0);
- if (!kgem_check_batch(kgem, 12) ||
+ if (!kgem_check_batch(kgem, 14) ||
!kgem_check_reloc(kgem, 2)) {
_kgem_submit(kgem);
_kgem_set_mode(kgem, KGEM_BLT);
@@ -403,24 +443,43 @@ static void sna_blt_alpha_fixup_one(struct sna *sna,
b[1] = blt->br13;
b[2] = (dst_y << 16) | dst_x;
b[3] = ((dst_y + height) << 16) | (dst_x + width);
- b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4,
- blt->bo[1],
- I915_GEM_DOMAIN_RENDER << 16 |
- I915_GEM_DOMAIN_RENDER |
- KGEM_RELOC_FENCED,
- 0);
- b[5] = blt->pitch[0];
- b[6] = (src_y << 16) | src_x;
- b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7,
- blt->bo[0],
- I915_GEM_DOMAIN_RENDER << 16 |
- KGEM_RELOC_FENCED,
- 0);
- b[8] = blt->pixel;
- b[9] = blt->pixel;
- b[10] = 0;
- b[11] = 0;
- kgem->nbatch += 12;
+ if (sna->kgem.gen >= 0100) {
+ *(uint64_t *)(b+4) =
+ kgem_add_reloc64(kgem, kgem->nbatch + 4, blt->bo[1],
+ I915_GEM_DOMAIN_RENDER << 16 |
+ I915_GEM_DOMAIN_RENDER |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[6] = blt->pitch[0];
+ b[7] = (src_y << 16) | src_x;
+ *(uint64_t *)(b+8) =
+ kgem_add_reloc64(kgem, kgem->nbatch + 8, blt->bo[0],
+ I915_GEM_DOMAIN_RENDER << 16 |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[10] = blt->pixel;
+ b[11] = blt->pixel;
+ b[12] = 0;
+ b[13] = 0;
+ kgem->nbatch += 14;
+ } else {
+ b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, blt->bo[1],
+ I915_GEM_DOMAIN_RENDER << 16 |
+ I915_GEM_DOMAIN_RENDER |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[5] = blt->pitch[0];
+ b[6] = (src_y << 16) | src_x;
+ b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, blt->bo[0],
+ I915_GEM_DOMAIN_RENDER << 16 |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[8] = blt->pixel;
+ b[9] = blt->pixel;
+ b[10] = 0;
+ b[11] = 0;
+ kgem->nbatch += 12;
+ }
assert(kgem->nbatch < kgem->surface);
}
@@ -446,34 +505,61 @@ static void sna_blt_copy_one(struct sna *sna,
assert(height > 0);
/* Compare against a previous fill */
- if (kgem->nbatch >= 6 &&
- blt->overwrites &&
- kgem->batch[kgem->nbatch-6] == (XY_COLOR_BLT | (blt->cmd & (BLT_WRITE_ALPHA | BLT_WRITE_RGB))) &&
- kgem->batch[kgem->nbatch-4] == ((uint32_t)dst_y << 16 | (uint16_t)dst_x) &&
- kgem->batch[kgem->nbatch-3] == ((uint32_t)(dst_y+height) << 16 | (uint16_t)(dst_x+width)) &&
+ if (blt->overwrites &&
kgem->reloc[kgem->nreloc-1].target_handle == blt->bo[1]->target_handle) {
- DBG(("%s: replacing last fill\n", __FUNCTION__));
- if (kgem_check_batch(kgem, 8-6)) {
- assert(sna->kgem.mode == KGEM_BLT);
- b = kgem->batch + kgem->nbatch - 6;
- b[0] = blt->cmd;
- b[1] = blt->br13;
- b[5] = (src_y << 16) | src_x;
- b[6] = blt->pitch[0];
- b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7 - 6,
- blt->bo[0],
- I915_GEM_DOMAIN_RENDER << 16 |
- KGEM_RELOC_FENCED,
- 0);
- kgem->nbatch += 8 - 6;
- assert(kgem->nbatch < kgem->surface);
- return;
+ if (sna->kgem.gen >= 0100) {
+ if (kgem->nbatch >= 7 &&
+ kgem->batch[kgem->nbatch-7] == (XY_COLOR_BLT | (blt->cmd & (BLT_WRITE_ALPHA | BLT_WRITE_RGB)) | 5) &&
+ kgem->batch[kgem->nbatch-5] == ((uint32_t)dst_y << 16 | (uint16_t)dst_x) &&
+ kgem->batch[kgem->nbatch-4] == ((uint32_t)(dst_y+height) << 16 | (uint16_t)(dst_x+width))) {
+ DBG(("%s: replacing last fill\n", __FUNCTION__));
+ if (kgem_check_batch(kgem, 3)) {
+ assert(kgem->mode == KGEM_BLT);
+ b = kgem->batch + kgem->nbatch - 7;
+ b[0] = blt->cmd;
+ b[1] = blt->br13;
+ b[6] = (src_y << 16) | src_x;
+ b[7] = blt->pitch[0];
+ *(uint64_t *)(b+8) =
+ kgem_add_reloc64(kgem, kgem->nbatch + 8 - 7, blt->bo[0],
+ I915_GEM_DOMAIN_RENDER << 16 |
+ KGEM_RELOC_FENCED,
+ 0);
+ kgem->nbatch += 3;
+ assert(kgem->nbatch < kgem->surface);
+ return;
+ }
+ kgem->nbatch -= 7;
+ kgem->nreloc--;
+ }
+ } else {
+ if (kgem->nbatch >= 6 &&
+ kgem->batch[kgem->nbatch-6] == (XY_COLOR_BLT | (blt->cmd & (BLT_WRITE_ALPHA | BLT_WRITE_RGB)) | 4) &&
+ kgem->batch[kgem->nbatch-4] == ((uint32_t)dst_y << 16 | (uint16_t)dst_x) &&
+ kgem->batch[kgem->nbatch-3] == ((uint32_t)(dst_y+height) << 16 | (uint16_t)(dst_x+width))) {
+ DBG(("%s: replacing last fill\n", __FUNCTION__));
+ if (kgem_check_batch(kgem, 8-6)) {
+ assert(kgem->mode == KGEM_BLT);
+ b = kgem->batch + kgem->nbatch - 6;
+ b[0] = blt->cmd;
+ b[1] = blt->br13;
+ b[5] = (src_y << 16) | src_x;
+ b[6] = blt->pitch[0];
+ b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7 - 6, blt->bo[0],
+ I915_GEM_DOMAIN_RENDER << 16 |
+ KGEM_RELOC_FENCED,
+ 0);
+ kgem->nbatch += 8 - 6;
+ assert(kgem->nbatch < kgem->surface);
+ return;
+ }
+ kgem->nbatch -= 6;
+ kgem->nreloc--;
+ }
}
- kgem->nbatch -= 6;
- kgem->nreloc--;
}
- if (!kgem_check_batch(kgem, 8) ||
+ if (!kgem_check_batch(kgem, 10) ||
!kgem_check_reloc(kgem, 2)) {
_kgem_submit(kgem);
_kgem_set_mode(kgem, KGEM_BLT);
@@ -485,20 +571,35 @@ static void sna_blt_copy_one(struct sna *sna,
b[1] = blt->br13;
b[2] = (dst_y << 16) | dst_x;
b[3] = ((dst_y + height) << 16) | (dst_x + width);
- b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4,
- blt->bo[1],
- I915_GEM_DOMAIN_RENDER << 16 |
- I915_GEM_DOMAIN_RENDER |
- KGEM_RELOC_FENCED,
- 0);
- b[5] = (src_y << 16) | src_x;
- b[6] = blt->pitch[0];
- b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7,
- blt->bo[0],
- I915_GEM_DOMAIN_RENDER << 16 |
- KGEM_RELOC_FENCED,
- 0);
- kgem->nbatch += 8;
+ if (kgem->gen >= 0100) {
+ *(uint64_t *)(b+4) =
+ kgem_add_reloc64(kgem, kgem->nbatch + 4, blt->bo[1],
+ I915_GEM_DOMAIN_RENDER << 16 |
+ I915_GEM_DOMAIN_RENDER |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[6] = (src_y << 16) | src_x;
+ b[7] = blt->pitch[0];
+ *(uint64_t *)(b+8) =
+ kgem_add_reloc64(kgem, kgem->nbatch + 8, blt->bo[0],
+ I915_GEM_DOMAIN_RENDER << 16 |
+ KGEM_RELOC_FENCED,
+ 0);
+ kgem->nbatch += 10;
+ } else {
+ b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, blt->bo[1],
+ I915_GEM_DOMAIN_RENDER << 16 |
+ I915_GEM_DOMAIN_RENDER |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[5] = (src_y << 16) | src_x;
+ b[6] = blt->pitch[0];
+ b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, blt->bo[0],
+ I915_GEM_DOMAIN_RENDER << 16 |
+ KGEM_RELOC_FENCED,
+ 0);
+ kgem->nbatch += 8;
+ }
assert(kgem->nbatch < kgem->surface);
}
@@ -1472,6 +1573,140 @@ static void blt_composite_copy_boxes__thread(struct sna *sna,
sna_vertex_unlock(&sna->render);
}
+static void blt_composite_copy_boxes__thread64(struct sna *sna,
+ const struct sna_composite_op *op,
+ const BoxRec *box, int nbox)
+{
+ struct kgem *kgem = &sna->kgem;
+ int dst_dx = op->dst.x;
+ int dst_dy = op->dst.y;
+ int src_dx = op->src.offset[0];
+ int src_dy = op->src.offset[1];
+ uint32_t cmd = op->u.blt.cmd;
+ uint32_t br13 = op->u.blt.br13;
+ struct kgem_bo *src_bo = op->u.blt.bo[0];
+ struct kgem_bo *dst_bo = op->u.blt.bo[1];
+ int src_pitch = op->u.blt.pitch[0];
+
+ DBG(("%s: nbox=%d\n", __FUNCTION__, nbox));
+
+ sna_vertex_lock(&sna->render);
+
+ if ((dst_dx | dst_dy) == 0) {
+ uint64_t hdr = (uint64_t)br13 << 32 | cmd;
+ do {
+ int nbox_this_time;
+
+ nbox_this_time = nbox;
+ if (10*nbox_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED)
+ nbox_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 10;
+ if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc)
+ nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc)/2;
+ assert(nbox_this_time);
+ nbox -= nbox_this_time;
+
+ assert(kgem->mode == KGEM_BLT);
+ do {
+ uint32_t *b = kgem->batch + kgem->nbatch;
+
+ DBG((" %s: box=(%d, %d)x(%d, %d)\n",
+ __FUNCTION__,
+ box->x1, box->y1,
+ box->x2 - box->x1, box->y2 - box->y1));
+
+ assert(box->x1 + src_dx >= 0);
+ assert(box->y1 + src_dy >= 0);
+ assert(box->x1 + src_dx <= INT16_MAX);
+ assert(box->y1 + src_dy <= INT16_MAX);
+
+ assert(box->x1 >= 0);
+ assert(box->y1 >= 0);
+
+ *(uint64_t *)&b[0] = hdr;
+ *(uint64_t *)&b[2] = *(const uint64_t *)box;
+ *(uint64_t *)(b+4) =
+ kgem_add_reloc64(kgem, kgem->nbatch + 4, dst_bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ I915_GEM_DOMAIN_RENDER |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[6] = add2(b[2], src_dx, src_dy);
+ b[7] = src_pitch;
+ *(uint64_t *)(b+8) =
+ kgem_add_reloc64(kgem, kgem->nbatch + 8, src_bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ KGEM_RELOC_FENCED,
+ 0);
+ kgem->nbatch += 10;
+ assert(kgem->nbatch < kgem->surface);
+ box++;
+ } while (--nbox_this_time);
+
+ if (!nbox)
+ break;
+
+ _kgem_submit(kgem);
+ _kgem_set_mode(kgem, KGEM_BLT);
+ } while (1);
+ } else {
+ do {
+ int nbox_this_time;
+
+ nbox_this_time = nbox;
+ if (10*nbox_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED)
+ nbox_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 10;
+ if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc)
+ nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc)/2;
+ assert(nbox_this_time);
+ nbox -= nbox_this_time;
+
+ assert(kgem->mode == KGEM_BLT);
+ do {
+ uint32_t *b = kgem->batch + kgem->nbatch;
+
+ DBG((" %s: box=(%d, %d)x(%d, %d)\n",
+ __FUNCTION__,
+ box->x1, box->y1,
+ box->x2 - box->x1, box->y2 - box->y1));
+
+ assert(box->x1 + src_dx >= 0);
+ assert(box->y1 + src_dy >= 0);
+
+ assert(box->x1 + dst_dx >= 0);
+ assert(box->y1 + dst_dy >= 0);
+
+ b[0] = cmd;
+ b[1] = br13;
+ b[2] = ((box->y1 + dst_dy) << 16) | (box->x1 + dst_dx);
+ b[3] = ((box->y2 + dst_dy) << 16) | (box->x2 + dst_dx);
+ *(uint64_t *)(b+4) =
+ kgem_add_reloc64(kgem, kgem->nbatch + 4, dst_bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ I915_GEM_DOMAIN_RENDER |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[6] = ((box->y1 + src_dy) << 16) | (box->x1 + src_dx);
+ b[7] = src_pitch;
+ *(uint64_t *)(b+8) =
+ kgem_add_reloc64(kgem, kgem->nbatch + 8, src_bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ KGEM_RELOC_FENCED,
+ 0);
+ kgem->nbatch += 10;
+ assert(kgem->nbatch < kgem->surface);
+ box++;
+ } while (--nbox_this_time);
+
+ if (!nbox)
+ break;
+
+ _kgem_submit(kgem);
+ _kgem_set_mode(kgem, KGEM_BLT);
+ } while (1);
+ }
+ sna_vertex_unlock(&sna->render);
+}
+
fastcall static void
blt_composite_copy_with_alpha(struct sna *sna,
const struct sna_composite_op *op,
@@ -1594,7 +1829,10 @@ prepare_blt_copy(struct sna *sna,
op->blt = blt_composite_copy;
op->box = blt_composite_copy_box;
op->boxes = blt_composite_copy_boxes;
- op->thread_boxes = blt_composite_copy_boxes__thread;
+ if (sna->kgem.gen >= 0100)
+ op->thread_boxes = blt_composite_copy_boxes__thread64;
+ else
+ op->thread_boxes = blt_composite_copy_boxes__thread;
if (!sna_blt_copy_init(sna, &op->u.blt, bo, op->dst.bo,
src->drawable.bitsPerPixel,
@@ -2597,7 +2835,7 @@ static bool sna_blt_fill_box(struct sna *sna, uint8_t alu,
assert(box->x1 >= 0);
assert(box->y1 >= 0);
- cmd = XY_COLOR_BLT;
+ cmd = XY_COLOR_BLT | (kgem->gen >= 0100 ? 5 : 4);
br13 = bo->pitch;
if (kgem->gen >= 040 && bo->tiling) {
cmd |= BLT_DST_TILED;
@@ -2616,29 +2854,58 @@ static bool sna_blt_fill_box(struct sna *sna, uint8_t alu,
/* All too frequently one blt completely overwrites the previous */
overwrites = alu == GXcopy || alu == GXclear || alu == GXset;
- if (overwrites && kgem->nbatch >= 6 &&
- kgem->batch[kgem->nbatch-6] == cmd &&
- *(uint64_t *)&kgem->batch[kgem->nbatch-4] == *(const uint64_t *)box &&
- kgem->reloc[kgem->nreloc-1].target_handle == bo->target_handle) {
- DBG(("%s: replacing last fill\n", __FUNCTION__));
- kgem->batch[kgem->nbatch-5] = br13;
- kgem->batch[kgem->nbatch-1] = color;
- return true;
- }
- if (overwrites && kgem->nbatch >= 8 &&
- (kgem->batch[kgem->nbatch-8] & 0xffc0000f) == XY_SRC_COPY_BLT_CMD &&
- *(uint64_t *)&kgem->batch[kgem->nbatch-6] == *(const uint64_t *)box &&
- kgem->reloc[kgem->nreloc-2].target_handle == bo->target_handle) {
- DBG(("%s: replacing last copy\n", __FUNCTION__));
- kgem->batch[kgem->nbatch-8] = cmd;
- kgem->batch[kgem->nbatch-7] = br13;
- kgem->batch[kgem->nbatch-3] = color;
- /* Keep the src bo as part of the execlist, just remove
- * its relocation entry.
- */
- kgem->nreloc--;
- kgem->nbatch -= 2;
- return true;
+ if (overwrites) {
+ if (sna->kgem.gen >= 0100) {
+ if (kgem->nbatch >= 7 &&
+ kgem->batch[kgem->nbatch-7] == cmd &&
+ *(uint64_t *)&kgem->batch[kgem->nbatch-5] == *(const uint64_t *)box &&
+ kgem->reloc[kgem->nreloc-1].target_handle == bo->target_handle) {
+ DBG(("%s: replacing last fill\n", __FUNCTION__));
+ kgem->batch[kgem->nbatch-6] = br13;
+ kgem->batch[kgem->nbatch-1] = color;
+ return true;
+ }
+ if (kgem->nbatch >= 10 &&
+ (kgem->batch[kgem->nbatch-10] & 0xffc0000f) == XY_SRC_COPY_BLT_CMD &&
+ *(uint64_t *)&kgem->batch[kgem->nbatch-8] == *(const uint64_t *)box &&
+ kgem->reloc[kgem->nreloc-2].target_handle == bo->target_handle) {
+ DBG(("%s: replacing last copy\n", __FUNCTION__));
+ kgem->batch[kgem->nbatch-10] = cmd;
+ kgem->batch[kgem->nbatch-8] = br13;
+ kgem->batch[kgem->nbatch-4] = color;
+ /* Keep the src bo as part of the execlist, just remove
+ * its relocation entry.
+ */
+ kgem->nreloc--;
+ kgem->nbatch -= 3;
+ return true;
+ }
+ } else {
+ if (kgem->nbatch >= 6 &&
+ kgem->batch[kgem->nbatch-6] == cmd &&
+ *(uint64_t *)&kgem->batch[kgem->nbatch-4] == *(const uint64_t *)box &&
+ kgem->reloc[kgem->nreloc-1].target_handle == bo->target_handle) {
+ DBG(("%s: replacing last fill\n", __FUNCTION__));
+ kgem->batch[kgem->nbatch-5] = br13;
+ kgem->batch[kgem->nbatch-1] = color;
+ return true;
+ }
+ if (kgem->nbatch >= 8 &&
+ (kgem->batch[kgem->nbatch-8] & 0xffc0000f) == XY_SRC_COPY_BLT_CMD &&
+ *(uint64_t *)&kgem->batch[kgem->nbatch-6] == *(const uint64_t *)box &&
+ kgem->reloc[kgem->nreloc-2].target_handle == bo->target_handle) {
+ DBG(("%s: replacing last copy\n", __FUNCTION__));
+ kgem->batch[kgem->nbatch-8] = cmd;
+ kgem->batch[kgem->nbatch-7] = br13;
+ kgem->batch[kgem->nbatch-3] = color;
+ /* Keep the src bo as part of the execlist, just remove
+ * its relocation entry.
+ */
+ kgem->nreloc--;
+ kgem->nbatch -= 2;
+ return true;
+ }
+ }
}
/* If we are currently emitting SCANLINES, keep doing so */
@@ -2652,7 +2919,7 @@ static bool sna_blt_fill_box(struct sna *sna, uint8_t alu,
}
kgem_set_mode(kgem, KGEM_BLT, bo);
- if (!kgem_check_batch(kgem, 6) ||
+ if (!kgem_check_batch(kgem, 7) ||
!kgem_check_reloc(kgem, 1) ||
!kgem_check_bo_fenced(kgem, bo)) {
kgem_submit(kgem);
@@ -2670,13 +2937,24 @@ static bool sna_blt_fill_box(struct sna *sna, uint8_t alu,
b[0] = cmd;
b[1] = br13;
*(uint64_t *)(b+2) = *(const uint64_t *)box;
- b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, bo,
- I915_GEM_DOMAIN_RENDER << 16 |
- I915_GEM_DOMAIN_RENDER |
- KGEM_RELOC_FENCED,
- 0);
- b[5] = color;
- kgem->nbatch += 6;
+ if (kgem->gen >= 0100) {
+ *(uint64_t *)(b+4) =
+ kgem_add_reloc64(kgem, kgem->nbatch + 4, bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ I915_GEM_DOMAIN_RENDER |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[6] = color;
+ kgem->nbatch += 7;
+ } else {
+ b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ I915_GEM_DOMAIN_RENDER |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[5] = color;
+ kgem->nbatch += 6;
+ }
assert(kgem->nbatch < kgem->surface);
sna->blt_state.fill_bo = bo->unique_id;
@@ -2734,7 +3012,7 @@ bool sna_blt_fill_boxes(struct sna *sna, uint8_t alu,
}
kgem_set_mode(kgem, KGEM_BLT, bo);
- if (!kgem_check_batch(kgem, 12) ||
+ if (!kgem_check_batch(kgem, 14) ||
!kgem_check_bo_fenced(kgem, bo)) {
kgem_submit(kgem);
if (!kgem_check_bo_fenced(&sna->kgem, bo))
@@ -2757,22 +3035,42 @@ bool sna_blt_fill_boxes(struct sna *sna, uint8_t alu,
assert(sna->kgem.mode == KGEM_BLT);
b = kgem->batch + kgem->nbatch;
- b[0] = XY_SETUP_MONO_PATTERN_SL_BLT;
- if (bpp == 32)
- b[0] |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
- b[1] = br13;
- b[2] = 0;
- b[3] = 0;
- b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, bo,
- I915_GEM_DOMAIN_RENDER << 16 |
- I915_GEM_DOMAIN_RENDER |
- KGEM_RELOC_FENCED,
- 0);
- b[5] = pixel;
- b[6] = pixel;
- b[7] = 0;
- b[8] = 0;
- kgem->nbatch += 9;
+ if (kgem->gen >= 0100) {
+ b[0] = XY_SETUP_MONO_PATTERN_SL_BLT | 8;
+ if (bpp == 32)
+ b[0] |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
+ b[1] = br13;
+ b[2] = 0;
+ b[3] = 0;
+ *(uint64_t *)(b+4) =
+ kgem_add_reloc64(kgem, kgem->nbatch + 4, bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ I915_GEM_DOMAIN_RENDER |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[6] = pixel;
+ b[7] = pixel;
+ b[8] = 0;
+ b[9] = 0;
+ kgem->nbatch += 10;
+ } else {
+ b[0] = XY_SETUP_MONO_PATTERN_SL_BLT | 7;
+ if (bpp == 32)
+ b[0] |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
+ b[1] = br13;
+ b[2] = 0;
+ b[3] = 0;
+ b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ I915_GEM_DOMAIN_RENDER |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[5] = pixel;
+ b[6] = pixel;
+ b[7] = 0;
+ b[8] = 0;
+ kgem->nbatch += 9;
+ }
assert(kgem->nbatch < kgem->surface);
sna->blt_state.fill_bo = bo->unique_id;
@@ -2819,22 +3117,42 @@ bool sna_blt_fill_boxes(struct sna *sna, uint8_t alu,
assert(sna->kgem.mode == KGEM_BLT);
b = kgem->batch + kgem->nbatch;
- b[0] = XY_SETUP_MONO_PATTERN_SL_BLT;
- if (bpp == 32)
- b[0] |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
- b[1] = br13;
- b[2] = 0;
- b[3] = 0;
- b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, bo,
- I915_GEM_DOMAIN_RENDER << 16 |
- I915_GEM_DOMAIN_RENDER |
- KGEM_RELOC_FENCED,
- 0);
- b[5] = pixel;
- b[6] = pixel;
- b[7] = 0;
- b[8] = 0;
- kgem->nbatch += 9;
+ if (kgem->gen >= 0100) {
+ b[0] = XY_SETUP_MONO_PATTERN_SL_BLT | 8;
+ if (bpp == 32)
+ b[0] |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
+ b[1] = br13;
+ b[2] = 0;
+ b[3] = 0;
+ *(uint64_t *)(b+4) =
+ kgem_add_reloc64(kgem, kgem->nbatch + 4, bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ I915_GEM_DOMAIN_RENDER |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[6] = pixel;
+ b[7] = pixel;
+ b[8] = 0;
+ b[9] = 0;
+ kgem->nbatch += 10;
+ } else {
+ b[0] = XY_SETUP_MONO_PATTERN_SL_BLT | 7;
+ if (bpp == 32)
+ b[0] |= BLT_WRITE_ALPHA | BLT_WRITE_RGB;
+ b[1] = br13;
+ b[2] = 0;
+ b[3] = 0;
+ b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ I915_GEM_DOMAIN_RENDER |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[5] = pixel;
+ b[6] = pixel;
+ b[7] = 0;
+ b[8] = 0;
+ kgem->nbatch += 9;
+ }
assert(kgem->nbatch < kgem->surface);
}
} while (nbox);
@@ -2897,19 +3215,31 @@ bool sna_blt_copy_boxes(struct sna *sna, uint8_t alu,
}
/* Compare first box against a previous fill */
- if (kgem->nbatch >= 6 &&
- (alu == GXcopy || alu == GXclear || alu == GXset) &&
- kgem->reloc[kgem->nreloc-1].target_handle == dst_bo->target_handle &&
- kgem->batch[kgem->nbatch-6] == ((cmd & ~XY_SRC_COPY_BLT_CMD) | XY_COLOR_BLT) &&
- kgem->batch[kgem->nbatch-4] == ((uint32_t)(box->y1 + dst_dy) << 16 | (uint16_t)(box->x1 + dst_dx)) &&
- kgem->batch[kgem->nbatch-3] == ((uint32_t)(box->y2 + dst_dy) << 16 | (uint16_t)(box->x2 + dst_dx))) {
- DBG(("%s: deleting last fill\n", __FUNCTION__));
- kgem->nbatch -= 6;
- kgem->nreloc--;
+ if ((alu == GXcopy || alu == GXclear || alu == GXset) &&
+ kgem->reloc[kgem->nreloc-1].target_handle == dst_bo->target_handle) {
+ if (kgem->gen >= 0100) {
+ if (kgem->nbatch >= 7 &&
+ kgem->batch[kgem->nbatch-7] == (XY_COLOR_BLT | (cmd & (BLT_WRITE_ALPHA | BLT_WRITE_RGB)) | 5) &&
+ kgem->batch[kgem->nbatch-5] == ((uint32_t)(box->y1 + dst_dy) << 16 | (uint16_t)(box->x1 + dst_dx)) &&
+ kgem->batch[kgem->nbatch-4] == ((uint32_t)(box->y2 + dst_dy) << 16 | (uint16_t)(box->x2 + dst_dx))) {
+ DBG(("%s: deleting last fill\n", __FUNCTION__));
+ kgem->nbatch -= 7;
+ kgem->nreloc--;
+ }
+ } else {
+ if (kgem->nbatch >= 6 &&
+ kgem->batch[kgem->nbatch-7] == (XY_COLOR_BLT | (cmd & (BLT_WRITE_ALPHA | BLT_WRITE_RGB)) | 4) &&
+ kgem->batch[kgem->nbatch-4] == ((uint32_t)(box->y1 + dst_dy) << 16 | (uint16_t)(box->x1 + dst_dx)) &&
+ kgem->batch[kgem->nbatch-3] == ((uint32_t)(box->y2 + dst_dy) << 16 | (uint16_t)(box->x2 + dst_dx))) {
+ DBG(("%s: deleting last fill\n", __FUNCTION__));
+ kgem->nbatch -= 6;
+ kgem->nreloc--;
+ }
+ }
}
kgem_set_mode(kgem, KGEM_BLT, dst_bo);
- if (!kgem_check_batch(kgem, 8) ||
+ if (!kgem_check_batch(kgem, 10) ||
!kgem_check_reloc(kgem, 2) ||
!kgem_check_many_bo_fenced(kgem, dst_bo, src_bo, NULL)) {
kgem_submit(kgem);
@@ -2922,112 +3252,229 @@ bool sna_blt_copy_boxes(struct sna *sna, uint8_t alu,
}
if ((dst_dx | dst_dy) == 0) {
- uint64_t hdr = (uint64_t)br13 << 32 | cmd;
- do {
- int nbox_this_time;
-
- nbox_this_time = nbox;
- if (8*nbox_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED)
- nbox_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 8;
- if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc)
- nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc)/2;
- assert(nbox_this_time);
- nbox -= nbox_this_time;
-
- assert(sna->kgem.mode == KGEM_BLT);
+ if (kgem->gen >= 0100) {
+ uint64_t hdr = (uint64_t)br13 << 32 | cmd | 8;
do {
- uint32_t *b = kgem->batch + kgem->nbatch;
-
- DBG((" %s: box=(%d, %d)x(%d, %d)\n",
- __FUNCTION__,
- box->x1, box->y1,
- box->x2 - box->x1, box->y2 - box->y1));
-
- assert(box->x1 + src_dx >= 0);
- assert(box->y1 + src_dy >= 0);
- assert(box->x1 + src_dx <= INT16_MAX);
- assert(box->y1 + src_dy <= INT16_MAX);
-
- assert(box->x1 >= 0);
- assert(box->y1 >= 0);
-
- *(uint64_t *)&b[0] = hdr;
- *(uint64_t *)&b[2] = *(const uint64_t *)box;
- b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst_bo,
- I915_GEM_DOMAIN_RENDER << 16 |
- I915_GEM_DOMAIN_RENDER |
- KGEM_RELOC_FENCED,
- 0);
- b[5] = add2(b[2], src_dx, src_dy);
- b[6] = src_pitch;
- b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src_bo,
- I915_GEM_DOMAIN_RENDER << 16 |
- KGEM_RELOC_FENCED,
- 0);
- kgem->nbatch += 8;
- assert(kgem->nbatch < kgem->surface);
- box++;
- } while (--nbox_this_time);
-
- if (!nbox)
- break;
-
- _kgem_submit(kgem);
- _kgem_set_mode(kgem, KGEM_BLT);
- } while (1);
+ int nbox_this_time;
+
+ nbox_this_time = nbox;
+ if (10*nbox_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED)
+ nbox_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 8;
+ if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc)
+ nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc)/2;
+ assert(nbox_this_time);
+ nbox -= nbox_this_time;
+
+ assert(sna->kgem.mode == KGEM_BLT);
+ do {
+ uint32_t *b = kgem->batch + kgem->nbatch;
+
+ DBG((" %s: box=(%d, %d)x(%d, %d)\n",
+ __FUNCTION__,
+ box->x1, box->y1,
+ box->x2 - box->x1, box->y2 - box->y1));
+
+ assert(box->x1 + src_dx >= 0);
+ assert(box->y1 + src_dy >= 0);
+ assert(box->x1 + src_dx <= INT16_MAX);
+ assert(box->y1 + src_dy <= INT16_MAX);
+
+ assert(box->x1 >= 0);
+ assert(box->y1 >= 0);
+
+ *(uint64_t *)&b[0] = hdr;
+ *(uint64_t *)&b[2] = *(const uint64_t *)box;
+ *(uint64_t *)(b+4) =
+ kgem_add_reloc64(kgem, kgem->nbatch + 4, dst_bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ I915_GEM_DOMAIN_RENDER |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[6] = add2(b[2], src_dx, src_dy);
+ b[7] = src_pitch;
+ *(uint64_t *)(b+8) =
+ kgem_add_reloc64(kgem, kgem->nbatch + 8, src_bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ KGEM_RELOC_FENCED,
+ 0);
+ kgem->nbatch += 10;
+ assert(kgem->nbatch < kgem->surface);
+ box++;
+ } while (--nbox_this_time);
+
+ if (!nbox)
+ break;
+
+ _kgem_submit(kgem);
+ _kgem_set_mode(kgem, KGEM_BLT);
+ } while (1);
+ } else {
+ uint64_t hdr = (uint64_t)br13 << 32 | cmd | 6;
+ do {
+ int nbox_this_time;
+
+ nbox_this_time = nbox;
+ if (8*nbox_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED)
+ nbox_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 8;
+ if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc)
+ nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc)/2;
+ assert(nbox_this_time);
+ nbox -= nbox_this_time;
+
+ assert(sna->kgem.mode == KGEM_BLT);
+ do {
+ uint32_t *b = kgem->batch + kgem->nbatch;
+
+ DBG((" %s: box=(%d, %d)x(%d, %d)\n",
+ __FUNCTION__,
+ box->x1, box->y1,
+ box->x2 - box->x1, box->y2 - box->y1));
+
+ assert(box->x1 + src_dx >= 0);
+ assert(box->y1 + src_dy >= 0);
+ assert(box->x1 + src_dx <= INT16_MAX);
+ assert(box->y1 + src_dy <= INT16_MAX);
+
+ assert(box->x1 >= 0);
+ assert(box->y1 >= 0);
+
+ *(uint64_t *)&b[0] = hdr;
+ *(uint64_t *)&b[2] = *(const uint64_t *)box;
+ b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst_bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ I915_GEM_DOMAIN_RENDER |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[5] = add2(b[2], src_dx, src_dy);
+ b[6] = src_pitch;
+ b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src_bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ KGEM_RELOC_FENCED,
+ 0);
+ kgem->nbatch += 8;
+ assert(kgem->nbatch < kgem->surface);
+ box++;
+ } while (--nbox_this_time);
+
+ if (!nbox)
+ break;
+
+ _kgem_submit(kgem);
+ _kgem_set_mode(kgem, KGEM_BLT);
+ } while (1);
+ }
} else {
- do {
- int nbox_this_time;
-
- nbox_this_time = nbox;
- if (8*nbox_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED)
- nbox_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 8;
- if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc)
- nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc)/2;
- assert(nbox_this_time);
- nbox -= nbox_this_time;
-
- assert(sna->kgem.mode == KGEM_BLT);
+ if (kgem->gen >= 0100) {
+ cmd |= 8;
do {
- uint32_t *b = kgem->batch + kgem->nbatch;
-
- DBG((" %s: box=(%d, %d)x(%d, %d)\n",
- __FUNCTION__,
- box->x1, box->y1,
- box->x2 - box->x1, box->y2 - box->y1));
-
- assert(box->x1 + src_dx >= 0);
- assert(box->y1 + src_dy >= 0);
-
- assert(box->x1 + dst_dx >= 0);
- assert(box->y1 + dst_dy >= 0);
-
- b[0] = cmd;
- b[1] = br13;
- b[2] = ((box->y1 + dst_dy) << 16) | (box->x1 + dst_dx);
- b[3] = ((box->y2 + dst_dy) << 16) | (box->x2 + dst_dx);
- b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst_bo,
- I915_GEM_DOMAIN_RENDER << 16 |
- I915_GEM_DOMAIN_RENDER |
- KGEM_RELOC_FENCED,
- 0);
- b[5] = ((box->y1 + src_dy) << 16) | (box->x1 + src_dx);
- b[6] = src_pitch;
- b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src_bo,
- I915_GEM_DOMAIN_RENDER << 16 |
- KGEM_RELOC_FENCED,
- 0);
- kgem->nbatch += 8;
- assert(kgem->nbatch < kgem->surface);
- box++;
- } while (--nbox_this_time);
-
- if (!nbox)
- break;
-
- _kgem_submit(kgem);
- _kgem_set_mode(kgem, KGEM_BLT);
- } while (1);
+ int nbox_this_time;
+
+ nbox_this_time = nbox;
+ if (10*nbox_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED)
+ nbox_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 8;
+ if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc)
+ nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc)/2;
+ assert(nbox_this_time);
+ nbox -= nbox_this_time;
+
+ assert(sna->kgem.mode == KGEM_BLT);
+ do {
+ uint32_t *b = kgem->batch + kgem->nbatch;
+
+ DBG((" %s: box=(%d, %d)x(%d, %d)\n",
+ __FUNCTION__,
+ box->x1, box->y1,
+ box->x2 - box->x1, box->y2 - box->y1));
+
+ assert(box->x1 + src_dx >= 0);
+ assert(box->y1 + src_dy >= 0);
+
+ assert(box->x1 + dst_dx >= 0);
+ assert(box->y1 + dst_dy >= 0);
+
+ b[0] = cmd;
+ b[1] = br13;
+ b[2] = ((box->y1 + dst_dy) << 16) | (box->x1 + dst_dx);
+ b[3] = ((box->y2 + dst_dy) << 16) | (box->x2 + dst_dx);
+ *(uint64_t *)(b+4) =
+ kgem_add_reloc64(kgem, kgem->nbatch + 4, dst_bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ I915_GEM_DOMAIN_RENDER |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[6] = ((box->y1 + src_dy) << 16) | (box->x1 + src_dx);
+ b[7] = src_pitch;
+ *(uint64_t *)(b+8) =
+ kgem_add_reloc64(kgem, kgem->nbatch + 8, src_bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ KGEM_RELOC_FENCED,
+ 0);
+ kgem->nbatch += 10;
+ assert(kgem->nbatch < kgem->surface);
+ box++;
+ } while (--nbox_this_time);
+
+ if (!nbox)
+ break;
+
+ _kgem_submit(kgem);
+ _kgem_set_mode(kgem, KGEM_BLT);
+ } while (1);
+ } else {
+ cmd |= 6;
+ do {
+ int nbox_this_time;
+
+ nbox_this_time = nbox;
+ if (8*nbox_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED)
+ nbox_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 8;
+ if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc)
+ nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc)/2;
+ assert(nbox_this_time);
+ nbox -= nbox_this_time;
+
+ assert(sna->kgem.mode == KGEM_BLT);
+ do {
+ uint32_t *b = kgem->batch + kgem->nbatch;
+
+ DBG((" %s: box=(%d, %d)x(%d, %d)\n",
+ __FUNCTION__,
+ box->x1, box->y1,
+ box->x2 - box->x1, box->y2 - box->y1));
+
+ assert(box->x1 + src_dx >= 0);
+ assert(box->y1 + src_dy >= 0);
+
+ assert(box->x1 + dst_dx >= 0);
+ assert(box->y1 + dst_dy >= 0);
+
+ b[0] = cmd;
+ b[1] = br13;
+ b[2] = ((box->y1 + dst_dy) << 16) | (box->x1 + dst_dx);
+ b[3] = ((box->y2 + dst_dy) << 16) | (box->x2 + dst_dx);
+ b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst_bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ I915_GEM_DOMAIN_RENDER |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[5] = ((box->y1 + src_dy) << 16) | (box->x1 + src_dx);
+ b[6] = src_pitch;
+ b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src_bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ KGEM_RELOC_FENCED,
+ 0);
+ kgem->nbatch += 8;
+ assert(kgem->nbatch < kgem->surface);
+ box++;
+ } while (--nbox_this_time);
+
+ if (!nbox)
+ break;
+
+ _kgem_submit(kgem);
+ _kgem_set_mode(kgem, KGEM_BLT);
+ } while (1);
+ }
}
if (kgem->nexec > 1 && __kgem_ring_empty(kgem)) {
diff --git a/src/sna/sna_io.c b/src/sna/sna_io.c
index 5d238f48..b5e4f376 100644
--- a/src/sna/sna_io.c
+++ b/src/sna/sna_io.c
@@ -461,7 +461,7 @@ fallback:
}
kgem_set_mode(kgem, KGEM_BLT, dst_bo);
- if (!kgem_check_batch(kgem, 8) ||
+ if (!kgem_check_batch(kgem, 10) ||
!kgem_check_reloc_and_exec(kgem, 2) ||
!kgem_check_many_bo_fenced(kgem, dst_bo, src_bo, NULL)) {
kgem_submit(kgem);
@@ -473,59 +473,123 @@ fallback:
tmp_nbox = nbox;
tmp_box = box;
offset = 0;
- do {
- int nbox_this_time;
-
- nbox_this_time = tmp_nbox;
- if (8*nbox_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED)
- nbox_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 8;
- if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc)
- nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc) / 2;
- assert(nbox_this_time);
- tmp_nbox -= nbox_this_time;
-
- for (n = 0; n < nbox_this_time; n++) {
- int height = tmp_box[n].y2 - tmp_box[n].y1;
- int width = tmp_box[n].x2 - tmp_box[n].x1;
- int pitch = PITCH(width, cpp);
- uint32_t *b = kgem->batch + kgem->nbatch;
-
- DBG((" blt offset %x: (%d, %d) x (%d, %d), pitch=%d\n",
- offset, tmp_box[n].x1, tmp_box[n].y1,
- width, height, pitch));
-
- assert(tmp_box[n].x1 >= 0);
- assert(tmp_box[n].x2 * dst->drawable.bitsPerPixel/8 <= src_bo->pitch);
- assert(tmp_box[n].y1 >= 0);
- assert(tmp_box[n].y2 * src_bo->pitch <= kgem_bo_size(src_bo));
-
- b[0] = cmd;
- b[1] = br13 | pitch;
- b[2] = 0;
- b[3] = height << 16 | width;
- b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst_bo,
- I915_GEM_DOMAIN_RENDER << 16 |
- I915_GEM_DOMAIN_RENDER |
- KGEM_RELOC_FENCED,
- offset);
- b[5] = tmp_box[n].y1 << 16 | tmp_box[n].x1;
- b[6] = src_pitch;
- b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src_bo,
- I915_GEM_DOMAIN_RENDER << 16 |
- KGEM_RELOC_FENCED,
- 0);
- kgem->nbatch += 8;
-
- offset += pitch * height;
- }
+ if (sna->kgem.gen >= 0100) {
+ cmd |= 8;
+ do {
+ int nbox_this_time;
+
+ nbox_this_time = tmp_nbox;
+ if (10*nbox_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED)
+ nbox_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 8;
+ if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc)
+ nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc) / 2;
+ assert(nbox_this_time);
+ tmp_nbox -= nbox_this_time;
+
+ assert(kgem->mode == KGEM_BLT);
+ for (n = 0; n < nbox_this_time; n++) {
+ int height = tmp_box[n].y2 - tmp_box[n].y1;
+ int width = tmp_box[n].x2 - tmp_box[n].x1;
+ int pitch = PITCH(width, cpp);
+ uint32_t *b = kgem->batch + kgem->nbatch;
+
+ DBG((" blt offset %x: (%d, %d) x (%d, %d), pitch=%d\n",
+ offset,
+ tmp_box[n].x1, tmp_box[n].y1,
+ width, height, pitch));
+
+ assert(tmp_box[n].x1 >= 0);
+ assert(tmp_box[n].x2 * dst->drawable.bitsPerPixel/8 <= src_bo->pitch);
+ assert(tmp_box[n].y1 >= 0);
+ assert(tmp_box[n].y2 * src_bo->pitch <= kgem_bo_size(src_bo));
+
+ b[0] = cmd;
+ b[1] = br13 | pitch;
+ b[2] = 0;
+ b[3] = height << 16 | width;
+ *(uint64_t *)(b+4) =
+ kgem_add_reloc64(kgem, kgem->nbatch + 4, dst_bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ I915_GEM_DOMAIN_RENDER |
+ KGEM_RELOC_FENCED,
+ offset);
+ b[6] = tmp_box[n].y1 << 16 | tmp_box[n].x1;
+ b[7] = src_pitch;
+ *(uint64_t *)(b+8) =
+ kgem_add_reloc64(kgem, kgem->nbatch + 8, src_bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ KGEM_RELOC_FENCED,
+ 0);
+ kgem->nbatch += 10;
+
+ offset += pitch * height;
+ }
- _kgem_submit(kgem);
- if (!tmp_nbox)
- break;
+ _kgem_submit(kgem);
+ if (!tmp_nbox)
+ break;
- _kgem_set_mode(kgem, KGEM_BLT);
- tmp_box += nbox_this_time;
- } while (1);
+ _kgem_set_mode(kgem, KGEM_BLT);
+ tmp_box += nbox_this_time;
+ } while (1);
+ } else {
+ cmd |= 6;
+ do {
+ int nbox_this_time;
+
+ nbox_this_time = tmp_nbox;
+ if (8*nbox_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED)
+ nbox_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 8;
+ if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc)
+ nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc) / 2;
+ assert(nbox_this_time);
+ tmp_nbox -= nbox_this_time;
+
+ assert(kgem->mode == KGEM_BLT);
+ for (n = 0; n < nbox_this_time; n++) {
+ int height = tmp_box[n].y2 - tmp_box[n].y1;
+ int width = tmp_box[n].x2 - tmp_box[n].x1;
+ int pitch = PITCH(width, cpp);
+ uint32_t *b = kgem->batch + kgem->nbatch;
+
+ DBG((" blt offset %x: (%d, %d) x (%d, %d), pitch=%d\n",
+ offset,
+ tmp_box[n].x1, tmp_box[n].y1,
+ width, height, pitch));
+
+ assert(tmp_box[n].x1 >= 0);
+ assert(tmp_box[n].x2 * dst->drawable.bitsPerPixel/8 <= src_bo->pitch);
+ assert(tmp_box[n].y1 >= 0);
+ assert(tmp_box[n].y2 * src_bo->pitch <= kgem_bo_size(src_bo));
+
+ b[0] = cmd;
+ b[1] = br13 | pitch;
+ b[2] = 0;
+ b[3] = height << 16 | width;
+ b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst_bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ I915_GEM_DOMAIN_RENDER |
+ KGEM_RELOC_FENCED,
+ offset);
+ b[5] = tmp_box[n].y1 << 16 | tmp_box[n].x1;
+ b[6] = src_pitch;
+ b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src_bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ KGEM_RELOC_FENCED,
+ 0);
+ kgem->nbatch += 8;
+
+ offset += pitch * height;
+ }
+
+ _kgem_submit(kgem);
+ if (!tmp_nbox)
+ break;
+
+ _kgem_set_mode(kgem, KGEM_BLT);
+ tmp_box += nbox_this_time;
+ } while (1);
+ }
assert(offset == __kgem_buffer_size(dst_bo));
kgem_buffer_read_sync(kgem, dst_bo);
@@ -924,7 +988,7 @@ tile:
}
kgem_set_mode(kgem, KGEM_BLT, dst_bo);
- if (!kgem_check_batch(kgem, 8) ||
+ if (!kgem_check_batch(kgem, 10) ||
!kgem_check_reloc_and_exec(kgem, 2) ||
!kgem_check_bo_fenced(kgem, dst_bo)) {
kgem_submit(kgem);
@@ -933,91 +997,185 @@ tile:
_kgem_set_mode(kgem, KGEM_BLT);
}
- do {
- int nbox_this_time;
-
- nbox_this_time = nbox;
- if (8*nbox_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED)
- nbox_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 8;
- if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc)
- nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc) / 2;
- assert(nbox_this_time);
- nbox -= nbox_this_time;
-
- /* Count the total number of bytes to be read and allocate a
- * single buffer large enough. Or if it is very small, combine
- * with other allocations. */
- offset = 0;
- for (n = 0; n < nbox_this_time; n++) {
- int height = box[n].y2 - box[n].y1;
- int width = box[n].x2 - box[n].x1;
- offset += PITCH(width, dst->drawable.bitsPerPixel >> 3) * height;
- }
+ if (kgem->gen >= 0100) {
+ cmd |= 8;
+ do {
+ int nbox_this_time;
+
+ nbox_this_time = nbox;
+ if (10*nbox_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED)
+ nbox_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 8;
+ if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc)
+ nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc) / 2;
+ assert(nbox_this_time);
+ nbox -= nbox_this_time;
+
+ /* Count the total number of bytes to be read and allocate a
+ * single buffer large enough. Or if it is very small, combine
+ * with other allocations. */
+ offset = 0;
+ for (n = 0; n < nbox_this_time; n++) {
+ int height = box[n].y2 - box[n].y1;
+ int width = box[n].x2 - box[n].x1;
+ offset += PITCH(width, dst->drawable.bitsPerPixel >> 3) * height;
+ }
- src_bo = kgem_create_buffer(kgem, offset,
- KGEM_BUFFER_WRITE_INPLACE | (nbox ? KGEM_BUFFER_LAST : 0),
- &ptr);
- if (!src_bo)
- break;
+ src_bo = kgem_create_buffer(kgem, offset,
+ KGEM_BUFFER_WRITE_INPLACE | (nbox ? KGEM_BUFFER_LAST : 0),
+ &ptr);
+ if (!src_bo)
+ break;
+
+ offset = 0;
+ do {
+ int height = box->y2 - box->y1;
+ int width = box->x2 - box->x1;
+ int pitch = PITCH(width, dst->drawable.bitsPerPixel >> 3);
+ uint32_t *b;
- offset = 0;
+ DBG((" %s: box src=(%d, %d), dst=(%d, %d) size=(%d, %d), dst offset=%d, dst pitch=%d\n",
+ __FUNCTION__,
+ box->x1 + src_dx, box->y1 + src_dy,
+ box->x1 + dst_dx, box->y1 + dst_dy,
+ width, height,
+ offset, pitch));
+
+ assert(box->x1 + src_dx >= 0);
+ assert((box->x2 + src_dx)*dst->drawable.bitsPerPixel <= 8*stride);
+ assert(box->y1 + src_dy >= 0);
+
+ assert(box->x1 + dst_dx >= 0);
+ assert(box->y1 + dst_dy >= 0);
+
+ memcpy_blt(src, (char *)ptr + offset,
+ dst->drawable.bitsPerPixel,
+ stride, pitch,
+ box->x1 + src_dx, box->y1 + src_dy,
+ 0, 0,
+ width, height);
+
+ assert(kgem->mode == KGEM_BLT);
+ b = kgem->batch + kgem->nbatch;
+ b[0] = cmd;
+ b[1] = br13;
+ b[2] = (box->y1 + dst_dy) << 16 | (box->x1 + dst_dx);
+ b[3] = (box->y2 + dst_dy) << 16 | (box->x2 + dst_dx);
+ *(uint64_t *)(b+4) =
+ kgem_add_reloc64(kgem, kgem->nbatch + 4, dst_bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ I915_GEM_DOMAIN_RENDER |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[6] = 0;
+ b[7] = pitch;
+ *(uint64_t *)(b+8) =
+ kgem_add_reloc64(kgem, kgem->nbatch + 8, src_bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ KGEM_RELOC_FENCED,
+ offset);
+ kgem->nbatch += 10;
+
+ box++;
+ offset += pitch * height;
+ } while (--nbox_this_time);
+ assert(offset == __kgem_buffer_size(src_bo));
+
+ if (nbox) {
+ _kgem_submit(kgem);
+ _kgem_set_mode(kgem, KGEM_BLT);
+ }
+
+ kgem_bo_destroy(kgem, src_bo);
+ } while (nbox);
+ } else {
+ cmd |= 6;
do {
- int height = box->y2 - box->y1;
- int width = box->x2 - box->x1;
- int pitch = PITCH(width, dst->drawable.bitsPerPixel >> 3);
- uint32_t *b;
-
- DBG((" %s: box src=(%d, %d), dst=(%d, %d) size=(%d, %d), dst offset=%d, dst pitch=%d\n",
- __FUNCTION__,
- box->x1 + src_dx, box->y1 + src_dy,
- box->x1 + dst_dx, box->y1 + dst_dy,
- width, height,
- offset, pitch));
-
- assert(box->x1 + src_dx >= 0);
- assert((box->x2 + src_dx)*dst->drawable.bitsPerPixel <= 8*stride);
- assert(box->y1 + src_dy >= 0);
-
- assert(box->x1 + dst_dx >= 0);
- assert(box->y1 + dst_dy >= 0);
-
- memcpy_blt(src, (char *)ptr + offset,
- dst->drawable.bitsPerPixel,
- stride, pitch,
- box->x1 + src_dx, box->y1 + src_dy,
- 0, 0,
- width, height);
-
- b = kgem->batch + kgem->nbatch;
- b[0] = cmd;
- b[1] = br13;
- b[2] = (box->y1 + dst_dy) << 16 | (box->x1 + dst_dx);
- b[3] = (box->y2 + dst_dy) << 16 | (box->x2 + dst_dx);
- b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst_bo,
- I915_GEM_DOMAIN_RENDER << 16 |
- I915_GEM_DOMAIN_RENDER |
- KGEM_RELOC_FENCED,
- 0);
- b[5] = 0;
- b[6] = pitch;
- b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src_bo,
- I915_GEM_DOMAIN_RENDER << 16 |
- KGEM_RELOC_FENCED,
- offset);
- kgem->nbatch += 8;
+ int nbox_this_time;
+
+ nbox_this_time = nbox;
+ if (8*nbox_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED)
+ nbox_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 8;
+ if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc)
+ nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc) / 2;
+ assert(nbox_this_time);
+ nbox -= nbox_this_time;
+
+ /* Count the total number of bytes to be read and allocate a
+ * single buffer large enough. Or if it is very small, combine
+ * with other allocations. */
+ offset = 0;
+ for (n = 0; n < nbox_this_time; n++) {
+ int height = box[n].y2 - box[n].y1;
+ int width = box[n].x2 - box[n].x1;
+ offset += PITCH(width, dst->drawable.bitsPerPixel >> 3) * height;
+ }
- box++;
- offset += pitch * height;
- } while (--nbox_this_time);
- assert(offset == __kgem_buffer_size(src_bo));
+ src_bo = kgem_create_buffer(kgem, offset,
+ KGEM_BUFFER_WRITE_INPLACE | (nbox ? KGEM_BUFFER_LAST : 0),
+ &ptr);
+ if (!src_bo)
+ break;
- if (nbox) {
- _kgem_submit(kgem);
- _kgem_set_mode(kgem, KGEM_BLT);
- }
+ offset = 0;
+ do {
+ int height = box->y2 - box->y1;
+ int width = box->x2 - box->x1;
+ int pitch = PITCH(width, dst->drawable.bitsPerPixel >> 3);
+ uint32_t *b;
+
+ DBG((" %s: box src=(%d, %d), dst=(%d, %d) size=(%d, %d), dst offset=%d, dst pitch=%d\n",
+ __FUNCTION__,
+ box->x1 + src_dx, box->y1 + src_dy,
+ box->x1 + dst_dx, box->y1 + dst_dy,
+ width, height,
+ offset, pitch));
+
+ assert(box->x1 + src_dx >= 0);
+ assert((box->x2 + src_dx)*dst->drawable.bitsPerPixel <= 8*stride);
+ assert(box->y1 + src_dy >= 0);
+
+ assert(box->x1 + dst_dx >= 0);
+ assert(box->y1 + dst_dy >= 0);
+
+ memcpy_blt(src, (char *)ptr + offset,
+ dst->drawable.bitsPerPixel,
+ stride, pitch,
+ box->x1 + src_dx, box->y1 + src_dy,
+ 0, 0,
+ width, height);
+
+ assert(kgem->mode == KGEM_BLT);
+ b = kgem->batch + kgem->nbatch;
+ b[0] = cmd;
+ b[1] = br13;
+ b[2] = (box->y1 + dst_dy) << 16 | (box->x1 + dst_dx);
+ b[3] = (box->y2 + dst_dy) << 16 | (box->x2 + dst_dx);
+ b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst_bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ I915_GEM_DOMAIN_RENDER |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[5] = 0;
+ b[6] = pitch;
+ b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src_bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ KGEM_RELOC_FENCED,
+ offset);
+ kgem->nbatch += 8;
+
+ box++;
+ offset += pitch * height;
+ } while (--nbox_this_time);
+ assert(offset == __kgem_buffer_size(src_bo));
+
+ if (nbox) {
+ _kgem_submit(kgem);
+ _kgem_set_mode(kgem, KGEM_BLT);
+ }
- kgem_bo_destroy(kgem, src_bo);
- } while (nbox);
+ kgem_bo_destroy(kgem, src_bo);
+ } while (nbox);
+ }
sna->blt_state.fill_bo = 0;
return true;
@@ -1315,7 +1473,7 @@ tile:
}
kgem_set_mode(kgem, KGEM_BLT, dst_bo);
- if (!kgem_check_batch(kgem, 8) ||
+ if (!kgem_check_batch(kgem, 10) ||
!kgem_check_reloc_and_exec(kgem, 2) ||
!kgem_check_bo_fenced(kgem, dst_bo)) {
kgem_submit(kgem);
@@ -1324,92 +1482,187 @@ tile:
_kgem_set_mode(kgem, KGEM_BLT);
}
- do {
- int nbox_this_time;
-
- nbox_this_time = nbox;
- if (8*nbox_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED)
- nbox_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 8;
- if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc)
- nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc) / 2;
- assert(nbox_this_time);
- nbox -= nbox_this_time;
-
- /* Count the total number of bytes to be read and allocate a
- * single buffer large enough. Or if it is very small, combine
- * with other allocations. */
- offset = 0;
- for (n = 0; n < nbox_this_time; n++) {
- int height = box[n].y2 - box[n].y1;
- int width = box[n].x2 - box[n].x1;
- offset += PITCH(width, dst->drawable.bitsPerPixel >> 3) * height;
- }
+ if (sna->kgem.gen >= 0100) {
+ cmd |= 8;
+ do {
+ int nbox_this_time;
+
+ nbox_this_time = nbox;
+ if (10*nbox_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED)
+ nbox_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 8;
+ if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc)
+ nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc) / 2;
+ assert(nbox_this_time);
+ nbox -= nbox_this_time;
+
+ /* Count the total number of bytes to be read and allocate a
+ * single buffer large enough. Or if it is very small, combine
+ * with other allocations. */
+ offset = 0;
+ for (n = 0; n < nbox_this_time; n++) {
+ int height = box[n].y2 - box[n].y1;
+ int width = box[n].x2 - box[n].x1;
+ offset += PITCH(width, dst->drawable.bitsPerPixel >> 3) * height;
+ }
+
+ src_bo = kgem_create_buffer(kgem, offset,
+ KGEM_BUFFER_WRITE_INPLACE | (nbox ? KGEM_BUFFER_LAST : 0),
+ &ptr);
+ if (!src_bo)
+ break;
- src_bo = kgem_create_buffer(kgem, offset,
- KGEM_BUFFER_WRITE_INPLACE | (nbox ? KGEM_BUFFER_LAST : 0),
- &ptr);
- if (!src_bo)
- break;
+ offset = 0;
+ do {
+ int height = box->y2 - box->y1;
+ int width = box->x2 - box->x1;
+ int pitch = PITCH(width, dst->drawable.bitsPerPixel >> 3);
+ uint32_t *b;
- offset = 0;
+ DBG((" %s: box src=(%d, %d), dst=(%d, %d) size=(%d, %d), dst offset=%d, dst pitch=%d\n",
+ __FUNCTION__,
+ box->x1 + src_dx, box->y1 + src_dy,
+ box->x1 + dst_dx, box->y1 + dst_dy,
+ width, height,
+ offset, pitch));
+
+ assert(box->x1 + src_dx >= 0);
+ assert((box->x2 + src_dx)*dst->drawable.bitsPerPixel <= 8*stride);
+ assert(box->y1 + src_dy >= 0);
+
+ assert(box->x1 + dst_dx >= 0);
+ assert(box->y1 + dst_dy >= 0);
+
+ memcpy_xor(src, (char *)ptr + offset,
+ dst->drawable.bitsPerPixel,
+ stride, pitch,
+ box->x1 + src_dx, box->y1 + src_dy,
+ 0, 0,
+ width, height,
+ and, or);
+
+ assert(kgem->mode == KGEM_BLT);
+ b = kgem->batch + kgem->nbatch;
+ b[0] = cmd;
+ b[1] = br13;
+ b[2] = (box->y1 + dst_dy) << 16 | (box->x1 + dst_dx);
+ b[3] = (box->y2 + dst_dy) << 16 | (box->x2 + dst_dx);
+ *(uint64_t *)(b+4) =
+ kgem_add_reloc64(kgem, kgem->nbatch + 4, dst_bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ I915_GEM_DOMAIN_RENDER |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[6] = 0;
+ b[7] = pitch;
+ *(uint64_t *)(b+8) =
+ kgem_add_reloc64(kgem, kgem->nbatch + 8, src_bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ KGEM_RELOC_FENCED,
+ offset);
+ kgem->nbatch += 10;
+
+ box++;
+ offset += pitch * height;
+ } while (--nbox_this_time);
+ assert(offset == __kgem_buffer_size(src_bo));
+
+ if (nbox) {
+ _kgem_submit(kgem);
+ _kgem_set_mode(kgem, KGEM_BLT);
+ }
+
+ kgem_bo_destroy(kgem, src_bo);
+ } while (nbox);
+ } else {
+ cmd |= 6;
do {
- int height = box->y2 - box->y1;
- int width = box->x2 - box->x1;
- int pitch = PITCH(width, dst->drawable.bitsPerPixel >> 3);
- uint32_t *b;
-
- DBG((" %s: box src=(%d, %d), dst=(%d, %d) size=(%d, %d), dst offset=%d, dst pitch=%d\n",
- __FUNCTION__,
- box->x1 + src_dx, box->y1 + src_dy,
- box->x1 + dst_dx, box->y1 + dst_dy,
- width, height,
- offset, pitch));
-
- assert(box->x1 + src_dx >= 0);
- assert((box->x2 + src_dx)*dst->drawable.bitsPerPixel <= 8*stride);
- assert(box->y1 + src_dy >= 0);
-
- assert(box->x1 + dst_dx >= 0);
- assert(box->y1 + dst_dy >= 0);
-
- memcpy_xor(src, (char *)ptr + offset,
- dst->drawable.bitsPerPixel,
- stride, pitch,
- box->x1 + src_dx, box->y1 + src_dy,
- 0, 0,
- width, height,
- and, or);
-
- b = kgem->batch + kgem->nbatch;
- b[0] = cmd;
- b[1] = br13;
- b[2] = (box->y1 + dst_dy) << 16 | (box->x1 + dst_dx);
- b[3] = (box->y2 + dst_dy) << 16 | (box->x2 + dst_dx);
- b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst_bo,
- I915_GEM_DOMAIN_RENDER << 16 |
- I915_GEM_DOMAIN_RENDER |
- KGEM_RELOC_FENCED,
- 0);
- b[5] = 0;
- b[6] = pitch;
- b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src_bo,
- I915_GEM_DOMAIN_RENDER << 16 |
- KGEM_RELOC_FENCED,
- offset);
- kgem->nbatch += 8;
+ int nbox_this_time;
+
+ nbox_this_time = nbox;
+ if (8*nbox_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED)
+ nbox_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 8;
+ if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc)
+ nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc) / 2;
+ assert(nbox_this_time);
+ nbox -= nbox_this_time;
+
+ /* Count the total number of bytes to be read and allocate a
+ * single buffer large enough. Or if it is very small, combine
+ * with other allocations. */
+ offset = 0;
+ for (n = 0; n < nbox_this_time; n++) {
+ int height = box[n].y2 - box[n].y1;
+ int width = box[n].x2 - box[n].x1;
+ offset += PITCH(width, dst->drawable.bitsPerPixel >> 3) * height;
+ }
- box++;
- offset += pitch * height;
- } while (--nbox_this_time);
- assert(offset == __kgem_buffer_size(src_bo));
+ src_bo = kgem_create_buffer(kgem, offset,
+ KGEM_BUFFER_WRITE_INPLACE | (nbox ? KGEM_BUFFER_LAST : 0),
+ &ptr);
+ if (!src_bo)
+ break;
- if (nbox) {
- _kgem_submit(kgem);
- _kgem_set_mode(kgem, KGEM_BLT);
- }
+ offset = 0;
+ do {
+ int height = box->y2 - box->y1;
+ int width = box->x2 - box->x1;
+ int pitch = PITCH(width, dst->drawable.bitsPerPixel >> 3);
+ uint32_t *b;
+
+ DBG((" %s: box src=(%d, %d), dst=(%d, %d) size=(%d, %d), dst offset=%d, dst pitch=%d\n",
+ __FUNCTION__,
+ box->x1 + src_dx, box->y1 + src_dy,
+ box->x1 + dst_dx, box->y1 + dst_dy,
+ width, height,
+ offset, pitch));
+
+ assert(box->x1 + src_dx >= 0);
+ assert((box->x2 + src_dx)*dst->drawable.bitsPerPixel <= 8*stride);
+ assert(box->y1 + src_dy >= 0);
+
+ assert(box->x1 + dst_dx >= 0);
+ assert(box->y1 + dst_dy >= 0);
+
+ memcpy_xor(src, (char *)ptr + offset,
+ dst->drawable.bitsPerPixel,
+ stride, pitch,
+ box->x1 + src_dx, box->y1 + src_dy,
+ 0, 0,
+ width, height,
+ and, or);
- kgem_bo_destroy(kgem, src_bo);
- } while (nbox);
+ assert(kgem->mode == KGEM_BLT);
+ b = kgem->batch + kgem->nbatch;
+ b[0] = cmd;
+ b[1] = br13;
+ b[2] = (box->y1 + dst_dy) << 16 | (box->x1 + dst_dx);
+ b[3] = (box->y2 + dst_dy) << 16 | (box->x2 + dst_dx);
+ b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst_bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ I915_GEM_DOMAIN_RENDER |
+ KGEM_RELOC_FENCED,
+ 0);
+ b[5] = 0;
+ b[6] = pitch;
+ b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src_bo,
+ I915_GEM_DOMAIN_RENDER << 16 |
+ KGEM_RELOC_FENCED,
+ offset);
+ kgem->nbatch += 8;
+
+ box++;
+ offset += pitch * height;
+ } while (--nbox_this_time);
+ assert(offset == __kgem_buffer_size(src_bo));
+
+ if (nbox) {
+ _kgem_submit(kgem);
+ _kgem_set_mode(kgem, KGEM_BLT);
+ }
+
+ kgem_bo_destroy(kgem, src_bo);
+ } while (nbox);
+ }
sna->blt_state.fill_bo = 0;
return true;
diff --git a/src/sna/sna_reg.h b/src/sna/sna_reg.h
index 26282361..bda6ef67 100644
--- a/src/sna/sna_reg.h
+++ b/src/sna/sna_reg.h
@@ -42,22 +42,22 @@
#define BLT_SRC_TILED (1<<15)
#define BLT_DST_TILED (1<<11)
-#define COLOR_BLT_CMD ((2<<29)|(0x40<<22)|(0x3))
-#define XY_COLOR_BLT ((2<<29)|(0x50<<22)|(0x4))
-#define XY_SETUP_BLT ((2<<29)|(1<<22)|6)
-#define XY_SETUP_MONO_PATTERN_SL_BLT ((2<<29)|(0x11<<22)|7)
-#define XY_SETUP_CLIP ((2<<29)|(3<<22)|1)
-#define XY_SCANLINE_BLT ((2<<29)|(0x25<<22)|1)
-#define XY_TEXT_IMMEDIATE_BLT ((2<<29)|(0x31<<22)|(1<<16))
-#define XY_SRC_COPY_BLT_CMD ((2<<29)|(0x53<<22)|6)
-#define SRC_COPY_BLT_CMD ((2<<29)|(0x43<<22)|0x4)
-#define XY_PAT_BLT ((2<<29)|(0x51<<22)|0x4)
-#define XY_PAT_BLT_IMMEDIATE ((2<<29)|(0x72<<22))
-#define XY_MONO_PAT ((0x2<<29)|(0x52<<22)|0x7)
-#define XY_MONO_SRC_COPY ((0x2<<29)|(0x54<<22)|(0x6))
-#define XY_MONO_SRC_COPY_IMM ((0x2<<29)|(0x71<<22))
-#define XY_FULL_MONO_PATTERN_BLT ((0x2<<29)|(0x57<<22)|0xa)
-#define XY_FULL_MONO_PATTERN_MONO_SRC_BLT ((0x2<<29)|(0x58<<22)|0xa)
+#define COLOR_BLT_CMD (2<<29|0x40<<22|(0x3))
+#define XY_COLOR_BLT (2<<29|0x50<<22|(0x4))
+#define XY_SETUP_BLT (2<<29|0x01<<22)
+#define XY_SETUP_MONO_PATTERN_SL_BLT (2<<29|0x11<<22)
+#define XY_SETUP_CLIP (2<<29|0x03<<22|1)
+#define XY_SCANLINE_BLT (2<<29|0x25<<22|1)
+#define XY_TEXT_IMMEDIATE_BLT (2<<29|0x31<<22|(1<<16))
+#define XY_SRC_COPY_BLT_CMD (2<<29|0x53<<22)
+#define SRC_COPY_BLT_CMD (2<<29|0x43<<22|0x4)
+#define XY_PAT_BLT (2<<29|0x51<<22)
+#define XY_PAT_BLT_IMMEDIATE (2<<29|0x72<<22)
+#define XY_MONO_PAT (2<<29|0x52<<22)
+#define XY_MONO_SRC_COPY (2<<29|0x54<<22)
+#define XY_MONO_SRC_COPY_IMM (2<<29|0x71<<22)
+#define XY_FULL_MONO_PATTERN_BLT (2<<29|0x57<<22)
+#define XY_FULL_MONO_PATTERN_MONO_SRC_BLT (2<<29|0x58<<22)
/* FLUSH commands */
#define BRW_3D(Pipeline,Opcode,Subopcode) \
diff --git a/src/uxa/intel_driver.c b/src/uxa/intel_driver.c
index 690ac778..431c34ba 100644
--- a/src/uxa/intel_driver.c
+++ b/src/uxa/intel_driver.c
@@ -410,6 +410,9 @@ static Bool can_accelerate_blt(struct intel_screen_private *intel)
if (INTEL_INFO(intel)->gen == -1)
return FALSE;
+ if (INTEL_INFO(intel)->gen >= 0100)
+ return FALSE;
+
if (xf86ReturnOptValBool(intel->Options, OPTION_ACCEL_DISABLE, FALSE) ||
!intel_option_cast_string_to_bool(intel, OPTION_ACCEL_METHOD, TRUE)) {
xf86DrvMsg(intel->scrn->scrnIndex, X_CONFIG,