diff options
-rw-r--r-- | src/sna/kgem.c | 128 | ||||
-rw-r--r-- | src/sna/kgem.h | 7 | ||||
-rw-r--r-- | src/sna/sna_accel.c | 1651 | ||||
-rw-r--r-- | src/sna/sna_blt.c | 989 | ||||
-rw-r--r-- | src/sna/sna_io.c | 679 | ||||
-rw-r--r-- | src/sna/sna_reg.h | 32 | ||||
-rw-r--r-- | src/uxa/intel_driver.c | 3 |
7 files changed, 2445 insertions, 1044 deletions
diff --git a/src/sna/kgem.c b/src/sna/kgem.c index 2710557d..4d225d1f 100644 --- a/src/sna/kgem.c +++ b/src/sna/kgem.c @@ -4982,6 +4982,7 @@ uint32_t kgem_add_reloc(struct kgem *kgem, DBG(("%s: handle=%d, pos=%d, delta=%d, domains=%08x\n", __FUNCTION__, bo ? bo->handle : 0, pos, delta, read_write_domain)); + assert(kgem->gen < 0100); assert((read_write_domain & 0x7fff) == 0 || bo != NULL); index = kgem->nreloc++; @@ -5053,6 +5054,77 @@ uint32_t kgem_add_reloc(struct kgem *kgem, return delta; } +uint64_t kgem_add_reloc64(struct kgem *kgem, + uint32_t pos, + struct kgem_bo *bo, + uint32_t read_write_domain, + uint64_t delta) +{ + int index; + + DBG(("%s: handle=%d, pos=%d, delta=%d, domains=%08x\n", + __FUNCTION__, bo ? bo->handle : 0, pos, delta, read_write_domain)); + + assert(kgem->gen >= 0100); + assert((read_write_domain & 0x7fff) == 0 || bo != NULL); + + index = kgem->nreloc++; + assert(index < ARRAY_SIZE(kgem->reloc)); + kgem->reloc[index].offset = pos * sizeof(kgem->batch[0]); + if (bo) { + assert(kgem->mode != KGEM_NONE); + assert(bo->refcnt); + while (bo->proxy) { + DBG(("%s: adding proxy [delta=%d] for handle=%d\n", + __FUNCTION__, bo->delta, bo->handle)); + delta += bo->delta; + assert(bo->handle == bo->proxy->handle); + /* need to release the cache upon batch submit */ + if (bo->exec == NULL) { + list_move_tail(&bo->request, + &kgem->next_request->buffers); + bo->rq = MAKE_REQUEST(kgem->next_request, + kgem->ring); + bo->exec = &_kgem_dummy_exec; + bo->domain = DOMAIN_GPU; + } + + if (read_write_domain & 0x7fff && !bo->gpu_dirty) + __kgem_bo_mark_dirty(bo); + + bo = bo->proxy; + assert(bo->refcnt); + } + assert(bo->refcnt); + + if (bo->exec == NULL) + kgem_add_bo(kgem, bo); + assert(bo->rq == MAKE_REQUEST(kgem->next_request, kgem->ring)); + assert(RQ_RING(bo->rq) == kgem->ring); + + kgem->reloc[index].delta = delta; + kgem->reloc[index].target_handle = bo->target_handle; + kgem->reloc[index].presumed_offset = bo->presumed_offset; + + if (read_write_domain & 0x7fff && !bo->gpu_dirty) { + assert(!bo->snoop || kgem->can_blt_cpu); + __kgem_bo_mark_dirty(bo); + } + + delta += bo->presumed_offset; + } else { + kgem->reloc[index].delta = delta; + kgem->reloc[index].target_handle = ~0U; + kgem->reloc[index].presumed_offset = 0; + if (kgem->nreloc__self < 256) + kgem->reloc__self[kgem->nreloc__self++] = index; + } + kgem->reloc[index].read_domains = read_write_domain >> 16; + kgem->reloc[index].write_domain = read_write_domain & 0x7fff; + + return delta; +} + static void kgem_trim_vma_cache(struct kgem *kgem, int type, int bucket) { int i, j; @@ -6338,6 +6410,7 @@ kgem_replace_bo(struct kgem *kgem, * we only attempt to allocate a linear bo. */ assert(src->tiling == I915_TILING_NONE); + assert(kgem_bo_can_blt(kgem, src)); size = height * pitch; size = NUM_PAGES(size); @@ -6363,7 +6436,7 @@ kgem_replace_bo(struct kgem *kgem, dst->refcnt = 1; kgem_set_mode(kgem, KGEM_BLT, dst); - if (!kgem_check_batch(kgem, 8) || + if (!kgem_check_batch(kgem, 10) || !kgem_check_reloc(kgem, 2) || !kgem_check_many_bo_fenced(kgem, src, dst, NULL)) { kgem_submit(kgem); @@ -6392,22 +6465,43 @@ kgem_replace_bo(struct kgem *kgem, } b = kgem->batch + kgem->nbatch; - b[0] = br00; - b[1] = br13; - b[2] = 0; - b[3] = height << 16 | width; - b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst, - I915_GEM_DOMAIN_RENDER << 16 | - I915_GEM_DOMAIN_RENDER | - KGEM_RELOC_FENCED, - 0); - b[5] = 0; - b[6] = pitch; - b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src, - I915_GEM_DOMAIN_RENDER << 16 | - KGEM_RELOC_FENCED, - 0); - kgem->nbatch += 8; + if (kgem->gen >= 0100) { + b[0] = br00 | 8; + b[1] = br13; + b[2] = 0; + b[3] = height << 16 | width; + *(uint64_t *)(b+4) = + kgem_add_reloc64(kgem, kgem->nbatch + 4, dst, + I915_GEM_DOMAIN_RENDER << 16 | + I915_GEM_DOMAIN_RENDER | + KGEM_RELOC_FENCED, + 0); + b[6] = 0; + b[7] = pitch; + *(uint64_t *)(b+8) = + kgem_add_reloc64(kgem, kgem->nbatch + 8, src, + I915_GEM_DOMAIN_RENDER << 16 | + KGEM_RELOC_FENCED, + 0); + kgem->nbatch += 10; + } else { + b[0] = br00 | 6; + b[1] = br13; + b[2] = 0; + b[3] = height << 16 | width; + b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst, + I915_GEM_DOMAIN_RENDER << 16 | + I915_GEM_DOMAIN_RENDER | + KGEM_RELOC_FENCED, + 0); + b[5] = 0; + b[6] = pitch; + b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src, + I915_GEM_DOMAIN_RENDER << 16 | + KGEM_RELOC_FENCED, + 0); + kgem->nbatch += 8; + } return dst; } diff --git a/src/sna/kgem.h b/src/sna/kgem.h index 7da359e0..b6d91561 100644 --- a/src/sna/kgem.h +++ b/src/sna/kgem.h @@ -65,11 +65,11 @@ struct kgem_bo { uint16_t offset; } binding; + uint64_t presumed_offset; uint32_t unique_id; uint32_t refcnt; uint32_t handle; uint32_t target_handle; - uint32_t presumed_offset; uint32_t delta; union { struct { @@ -446,6 +446,11 @@ uint32_t kgem_add_reloc(struct kgem *kgem, struct kgem_bo *bo, uint32_t read_write_domains, uint32_t delta); +uint64_t kgem_add_reloc64(struct kgem *kgem, + uint32_t pos, + struct kgem_bo *bo, + uint32_t read_write_domains, + uint64_t delta); void *kgem_bo_map(struct kgem *kgem, struct kgem_bo *bo); void *kgem_bo_map__async(struct kgem *kgem, struct kgem_bo *bo); diff --git a/src/sna/sna_accel.c b/src/sna/sna_accel.c index ffb61170..02caf4b9 100644 --- a/src/sna/sna_accel.c +++ b/src/sna/sna_accel.c @@ -4509,7 +4509,7 @@ sna_put_xybitmap_blt(DrawablePtr drawable, GCPtr gc, RegionPtr region, struct kgem_bo *upload; void *ptr; - if (!kgem_check_batch(&sna->kgem, 8) || + if (!kgem_check_batch(&sna->kgem, 10) || !kgem_check_bo_fenced(&sna->kgem, bo) || !kgem_check_reloc_and_exec(&sna->kgem, 2)) { kgem_submit(&sna->kgem); @@ -4548,32 +4548,61 @@ sna_put_xybitmap_blt(DrawablePtr drawable, GCPtr gc, RegionPtr region, } while (--bh); assert(sna->kgem.mode == KGEM_BLT); - b = sna->kgem.batch + sna->kgem.nbatch; - b[0] = XY_MONO_SRC_COPY | 3 << 20; - b[0] |= ((box->x1 - x) & 7) << 17; - b[1] = bo->pitch; - if (sna->kgem.gen >= 040 && bo->tiling) { - b[0] |= BLT_DST_TILED; - b[1] >>= 2; - } - b[1] |= blt_depth(drawable->depth) << 24; - b[1] |= rop << 16; - b[2] = box->y1 << 16 | box->x1; - b[3] = box->y2 << 16 | box->x2; - b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4, bo, - I915_GEM_DOMAIN_RENDER << 16 | - I915_GEM_DOMAIN_RENDER | - KGEM_RELOC_FENCED, - 0); - b[5] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 5, - upload, - I915_GEM_DOMAIN_RENDER << 16 | - KGEM_RELOC_FENCED, - 0); - b[6] = gc->bgPixel; - b[7] = gc->fgPixel; + if (sna->kgem.gen >= 0100) { + b = sna->kgem.batch + sna->kgem.nbatch; + b[0] = XY_MONO_SRC_COPY | 3 << 20 | 8; + b[0] |= ((box->x1 - x) & 7) << 17; + b[1] = bo->pitch; + if (bo->tiling) { + b[0] |= BLT_DST_TILED; + b[1] >>= 2; + } + b[1] |= blt_depth(drawable->depth) << 24; + b[1] |= rop << 16; + b[2] = box->y1 << 16 | box->x1; + b[3] = box->y2 << 16 | box->x2; + *(uint64_t *)(b+4) = + kgem_add_reloc64(&sna->kgem, sna->kgem.nbatch + 4, bo, + I915_GEM_DOMAIN_RENDER << 16 | + I915_GEM_DOMAIN_RENDER | + KGEM_RELOC_FENCED, + 0); + *(uint64_t *)(b+6) = + kgem_add_reloc64(&sna->kgem, sna->kgem.nbatch + 6, upload, + I915_GEM_DOMAIN_RENDER << 16 | + KGEM_RELOC_FENCED, + 0); + b[8] = gc->bgPixel; + b[9] = gc->fgPixel; + + sna->kgem.nbatch += 10; + } else { + b = sna->kgem.batch + sna->kgem.nbatch; + b[0] = XY_MONO_SRC_COPY | 3 << 20 | 6; + b[0] |= ((box->x1 - x) & 7) << 17; + b[1] = bo->pitch; + if (sna->kgem.gen >= 040 && bo->tiling) { + b[0] |= BLT_DST_TILED; + b[1] >>= 2; + } + b[1] |= blt_depth(drawable->depth) << 24; + b[1] |= rop << 16; + b[2] = box->y1 << 16 | box->x1; + b[3] = box->y2 << 16 | box->x2; + b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4, bo, + I915_GEM_DOMAIN_RENDER << 16 | + I915_GEM_DOMAIN_RENDER | + KGEM_RELOC_FENCED, + 0); + b[5] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 5, upload, + I915_GEM_DOMAIN_RENDER << 16 | + KGEM_RELOC_FENCED, + 0); + b[6] = gc->bgPixel; + b[7] = gc->fgPixel; - sna->kgem.nbatch += 8; + sna->kgem.nbatch += 8; + } sigtrap_put(); } kgem_bo_destroy(&sna->kgem, upload); @@ -4646,7 +4675,7 @@ sna_put_xypixmap_blt(DrawablePtr drawable, GCPtr gc, RegionPtr region, struct kgem_bo *upload; void *ptr; - if (!kgem_check_batch(&sna->kgem, 12) || + if (!kgem_check_batch(&sna->kgem, 14) || !kgem_check_bo_fenced(&sna->kgem, bo) || !kgem_check_reloc_and_exec(&sna->kgem, 2)) { kgem_submit(&sna->kgem); @@ -4683,38 +4712,70 @@ sna_put_xypixmap_blt(DrawablePtr drawable, GCPtr gc, RegionPtr region, } while (--bh); assert(sna->kgem.mode == KGEM_BLT); - b = sna->kgem.batch + sna->kgem.nbatch; - b[0] = XY_FULL_MONO_PATTERN_MONO_SRC_BLT | 3 << 20; - b[0] |= ((box->x1 - x) & 7) << 17; - b[1] = bo->pitch; - if (sna->kgem.gen >= 040 && bo->tiling) { - b[0] |= BLT_DST_TILED; - b[1] >>= 2; + if (sna->kgem.gen >= 0100) { + assert(sna->kgem.mode == KGEM_BLT); + b = sna->kgem.batch + sna->kgem.nbatch; + b[0] = XY_FULL_MONO_PATTERN_MONO_SRC_BLT | 3 << 20 | 12; + b[0] |= ((box->x1 - x) & 7) << 17; + b[1] = bo->pitch; + if (bo->tiling) { + b[0] |= BLT_DST_TILED; + b[1] >>= 2; + } + b[1] |= 1 << 31; /* solid pattern */ + b[1] |= blt_depth(drawable->depth) << 24; + b[1] |= 0xce << 16; /* S or (D and !P) */ + b[2] = box->y1 << 16 | box->x1; + b[3] = box->y2 << 16 | box->x2; + *(uint64_t *)(b+4) = + kgem_add_reloc64(&sna->kgem, sna->kgem.nbatch + 4, bo, + I915_GEM_DOMAIN_RENDER << 16 | + I915_GEM_DOMAIN_RENDER | + KGEM_RELOC_FENCED, + 0); + *(uint64_t *)(b+6) = + kgem_add_reloc64(&sna->kgem, sna->kgem.nbatch + 6, upload, + I915_GEM_DOMAIN_RENDER << 16 | + KGEM_RELOC_FENCED, + 0); + b[8] = 0; + b[9] = i; + b[10] = i; + b[11] = i; + b[12] = -1; + b[13] = -1; + sna->kgem.nbatch += 14; + } else { + b = sna->kgem.batch + sna->kgem.nbatch; + b[0] = XY_FULL_MONO_PATTERN_MONO_SRC_BLT | 3 << 20 | 10; + b[0] |= ((box->x1 - x) & 7) << 17; + b[1] = bo->pitch; + if (sna->kgem.gen >= 040 && bo->tiling) { + b[0] |= BLT_DST_TILED; + b[1] >>= 2; + } + b[1] |= 1 << 31; /* solid pattern */ + b[1] |= blt_depth(drawable->depth) << 24; + b[1] |= 0xce << 16; /* S or (D and !P) */ + b[2] = box->y1 << 16 | box->x1; + b[3] = box->y2 << 16 | box->x2; + b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4, bo, + I915_GEM_DOMAIN_RENDER << 16 | + I915_GEM_DOMAIN_RENDER | + KGEM_RELOC_FENCED, + 0); + b[5] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 5, upload, + I915_GEM_DOMAIN_RENDER << 16 | + KGEM_RELOC_FENCED, + 0); + b[6] = 0; + b[7] = i; + b[8] = i; + b[9] = i; + b[10] = -1; + b[11] = -1; + sna->kgem.nbatch += 12; } - b[1] |= 1 << 31; /* solid pattern */ - b[1] |= blt_depth(drawable->depth) << 24; - b[1] |= 0xce << 16; /* S or (D and !P) */ - b[2] = box->y1 << 16 | box->x1; - b[3] = box->y2 << 16 | box->x2; - b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4, - bo, - I915_GEM_DOMAIN_RENDER << 16 | - I915_GEM_DOMAIN_RENDER | - KGEM_RELOC_FENCED, - 0); - b[5] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 5, - upload, - I915_GEM_DOMAIN_RENDER << 16 | - KGEM_RELOC_FENCED, - 0); - b[6] = 0; - b[7] = i; - b[8] = i; - b[9] = i; - b[10] = -1; - b[11] = -1; - - sna->kgem.nbatch += 12; sigtrap_put(); } kgem_bo_destroy(&sna->kgem, upload); @@ -7367,7 +7428,7 @@ sna_copy_bitmap_blt(DrawablePtr _bitmap, DrawablePtr drawable, GCPtr gc, if (src_stride <= 128) { src_stride = ALIGN(src_stride, 8) / 4; assert(src_stride <= 32); - if (!kgem_check_batch(&sna->kgem, 7+src_stride) || + if (!kgem_check_batch(&sna->kgem, 8+src_stride) || !kgem_check_bo_fenced(&sna->kgem, arg->bo) || !kgem_check_reloc(&sna->kgem, 1)) { kgem_submit(&sna->kgem); @@ -7377,24 +7438,43 @@ sna_copy_bitmap_blt(DrawablePtr _bitmap, DrawablePtr drawable, GCPtr gc, } assert(sna->kgem.mode == KGEM_BLT); - b = sna->kgem.batch + sna->kgem.nbatch; - b[0] = XY_MONO_SRC_COPY_IMM | (5 + src_stride) | br00; - b[0] |= ((box->x1 + sx) & 7) << 17; - b[1] = br13; - b[2] = (box->y1 + dy) << 16 | (box->x1 + dx); - b[3] = (box->y2 + dy) << 16 | (box->x2 + dx); - b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4, - arg->bo, - I915_GEM_DOMAIN_RENDER << 16 | - I915_GEM_DOMAIN_RENDER | - KGEM_RELOC_FENCED, - 0); - b[5] = gc->bgPixel; - b[6] = gc->fgPixel; + if (sna->kgem.gen >= 0100) { + b = sna->kgem.batch + sna->kgem.nbatch; + b[0] = XY_MONO_SRC_COPY_IMM | (6 + src_stride) | br00; + b[0] |= ((box->x1 + sx) & 7) << 17; + b[1] = br13; + b[2] = (box->y1 + dy) << 16 | (box->x1 + dx); + b[3] = (box->y2 + dy) << 16 | (box->x2 + dx); + *(uint64_t *)(b+4) = + kgem_add_reloc64(&sna->kgem, sna->kgem.nbatch + 4, arg->bo, + I915_GEM_DOMAIN_RENDER << 16 | + I915_GEM_DOMAIN_RENDER | + KGEM_RELOC_FENCED, + 0); + b[5] = gc->bgPixel; + b[6] = gc->fgPixel; - sna->kgem.nbatch += 7 + src_stride; + dst = (uint8_t *)&b[8]; + sna->kgem.nbatch += 8 + src_stride; + } else { + b = sna->kgem.batch + sna->kgem.nbatch; + b[0] = XY_MONO_SRC_COPY_IMM | (5 + src_stride) | br00; + b[0] |= ((box->x1 + sx) & 7) << 17; + b[1] = br13; + b[2] = (box->y1 + dy) << 16 | (box->x1 + dx); + b[3] = (box->y2 + dy) << 16 | (box->x2 + dx); + b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4, arg->bo, + I915_GEM_DOMAIN_RENDER << 16 | + I915_GEM_DOMAIN_RENDER | + KGEM_RELOC_FENCED, + 0); + b[5] = gc->bgPixel; + b[6] = gc->fgPixel; + + dst = (uint8_t *)&b[7]; + sna->kgem.nbatch += 7 + src_stride; + } - dst = (uint8_t *)&b[7]; src_stride = bitmap->devKind; src = bitmap->devPrivate.ptr; src += (box->y1 + sy) * src_stride + bx1/8; @@ -7414,7 +7494,7 @@ sna_copy_bitmap_blt(DrawablePtr _bitmap, DrawablePtr drawable, GCPtr gc, struct kgem_bo *upload; void *ptr; - if (!kgem_check_batch(&sna->kgem, 8) || + if (!kgem_check_batch(&sna->kgem, 10) || !kgem_check_bo_fenced(&sna->kgem, arg->bo) || !kgem_check_reloc_and_exec(&sna->kgem, 2)) { kgem_submit(&sna->kgem); @@ -7433,27 +7513,47 @@ sna_copy_bitmap_blt(DrawablePtr _bitmap, DrawablePtr drawable, GCPtr gc, if (sigtrap_get() == 0) { assert(sna->kgem.mode == KGEM_BLT); b = sna->kgem.batch + sna->kgem.nbatch; + if (sna->kgem.gen >= 0100) { + b[0] = XY_MONO_SRC_COPY | br00 | 8; + b[0] |= ((box->x1 + sx) & 7) << 17; + b[1] = br13; + b[2] = (box->y1 + dy) << 16 | (box->x1 + dx); + b[3] = (box->y2 + dy) << 16 | (box->x2 + dx); + *(uint64_t *)(b+4) = + kgem_add_reloc64(&sna->kgem, sna->kgem.nbatch + 4, arg->bo, + I915_GEM_DOMAIN_RENDER << 16 | + I915_GEM_DOMAIN_RENDER | + KGEM_RELOC_FENCED, + 0); + *(uint64_t *)(b+6) = + kgem_add_reloc64(&sna->kgem, sna->kgem.nbatch + 6, upload, + I915_GEM_DOMAIN_RENDER << 16 | + KGEM_RELOC_FENCED, + 0); + b[8] = gc->bgPixel; + b[9] = gc->fgPixel; + + sna->kgem.nbatch += 10; + } else { + b[0] = XY_MONO_SRC_COPY | br00 | 6; + b[0] |= ((box->x1 + sx) & 7) << 17; + b[1] = br13; + b[2] = (box->y1 + dy) << 16 | (box->x1 + dx); + b[3] = (box->y2 + dy) << 16 | (box->x2 + dx); + b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4, arg->bo, + I915_GEM_DOMAIN_RENDER << 16 | + I915_GEM_DOMAIN_RENDER | + KGEM_RELOC_FENCED, + 0); + b[5] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 5, upload, + I915_GEM_DOMAIN_RENDER << 16 | + KGEM_RELOC_FENCED, + 0); + b[6] = gc->bgPixel; + b[7] = gc->fgPixel; - b[0] = XY_MONO_SRC_COPY | br00; - b[0] |= ((box->x1 + sx) & 7) << 17; - b[1] = br13; - b[2] = (box->y1 + dy) << 16 | (box->x1 + dx); - b[3] = (box->y2 + dy) << 16 | (box->x2 + dx); - b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4, - arg->bo, - I915_GEM_DOMAIN_RENDER << 16 | - I915_GEM_DOMAIN_RENDER | - KGEM_RELOC_FENCED, - 0); - b[5] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 5, - upload, - I915_GEM_DOMAIN_RENDER << 16 | - KGEM_RELOC_FENCED, - 0); - b[6] = gc->bgPixel; - b[7] = gc->fgPixel; - - sna->kgem.nbatch += 8; + sna->kgem.nbatch += 8; + } dst = ptr; src_stride = bitmap->devKind; @@ -7542,7 +7642,7 @@ sna_copy_plane_blt(DrawablePtr source, DrawablePtr drawable, GCPtr gc, box->x2, box->y2, sx, sy, bx1, bx2)); - if (!kgem_check_batch(&sna->kgem, 8) || + if (!kgem_check_batch(&sna->kgem, 10) || !kgem_check_bo_fenced(&sna->kgem, arg->bo) || !kgem_check_reloc_and_exec(&sna->kgem, 2)) { kgem_submit(&sna->kgem); @@ -7667,25 +7767,45 @@ sna_copy_plane_blt(DrawablePtr source, DrawablePtr drawable, GCPtr gc, assert(sna->kgem.mode == KGEM_BLT); b = sna->kgem.batch + sna->kgem.nbatch; - b[0] = br00 | ((box->x1 + sx) & 7) << 17; - b[1] = br13; - b[2] = (box->y1 + dy) << 16 | (box->x1 + dx); - b[3] = (box->y2 + dy) << 16 | (box->x2 + dx); - b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4, - arg->bo, - I915_GEM_DOMAIN_RENDER << 16 | - I915_GEM_DOMAIN_RENDER | - KGEM_RELOC_FENCED, - 0); - b[5] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 5, - upload, - I915_GEM_DOMAIN_RENDER << 16 | - KGEM_RELOC_FENCED, - 0); - b[6] = gc->bgPixel; - b[7] = gc->fgPixel; + if (sna->kgem.gen >= 0100) { + b[0] = br00 | ((box->x1 + sx) & 7) << 17 | 8; + b[1] = br13; + b[2] = (box->y1 + dy) << 16 | (box->x1 + dx); + b[3] = (box->y2 + dy) << 16 | (box->x2 + dx); + *(uint64_t *)(b+4) = + kgem_add_reloc64(&sna->kgem, sna->kgem.nbatch + 4, arg->bo, + I915_GEM_DOMAIN_RENDER << 16 | + I915_GEM_DOMAIN_RENDER | + KGEM_RELOC_FENCED, + 0); + *(uint64_t *)(b+6) = + kgem_add_reloc64(&sna->kgem, sna->kgem.nbatch + 6, upload, + I915_GEM_DOMAIN_RENDER << 16 | + KGEM_RELOC_FENCED, + 0); + b[8] = gc->bgPixel; + b[9] = gc->fgPixel; + + sna->kgem.nbatch += 10; + } else { + b[0] = br00 | ((box->x1 + sx) & 7) << 17 | 6; + b[1] = br13; + b[2] = (box->y1 + dy) << 16 | (box->x1 + dx); + b[3] = (box->y2 + dy) << 16 | (box->x2 + dx); + b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4, arg->bo, + I915_GEM_DOMAIN_RENDER << 16 | + I915_GEM_DOMAIN_RENDER | + KGEM_RELOC_FENCED, + 0); + b[5] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 5, upload, + I915_GEM_DOMAIN_RENDER << 16 | + KGEM_RELOC_FENCED, + 0); + b[6] = gc->bgPixel; + b[7] = gc->fgPixel; - sna->kgem.nbatch += 8; + sna->kgem.nbatch += 8; + } sigtrap_put(); } kgem_bo_destroy(&sna->kgem, upload); @@ -11174,7 +11294,7 @@ sna_poly_fill_rect_tiled_8x8_blt(DrawablePtr drawable, __FUNCTION__, n, r->x, r->y, r->width, r->height, clipped)); kgem_set_mode(&sna->kgem, KGEM_BLT, bo); - if (!kgem_check_batch(&sna->kgem, 8+2*3) || + if (!kgem_check_batch(&sna->kgem, 10+2*3) || !kgem_check_reloc(&sna->kgem, 2) || !kgem_check_bo_fenced(&sna->kgem, bo)) { kgem_submit(&sna->kgem); @@ -11218,41 +11338,81 @@ sna_poly_fill_rect_tiled_8x8_blt(DrawablePtr drawable, assert(sna->kgem.mode == KGEM_BLT); b = sna->kgem.batch + sna->kgem.nbatch; - b[0] = XY_PAT_BLT | tx << 12 | ty << 8 | 3 << 20 | (br00 & BLT_DST_TILED); - b[1] = br13; - b[2] = (r->y + dy) << 16 | (r->x + dx); - b[3] = (r->y + r->height + dy) << 16 | (r->x + r->width + dx); - b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4, bo, - I915_GEM_DOMAIN_RENDER << 16 | - I915_GEM_DOMAIN_RENDER | - KGEM_RELOC_FENCED, - 0); - b[5] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 5, tile_bo, - I915_GEM_DOMAIN_RENDER << 16 | - KGEM_RELOC_FENCED, - 0); - sna->kgem.nbatch += 6; + if (sna->kgem.gen >= 0100) { + b[0] = XY_PAT_BLT | tx << 12 | ty << 8 | 3 << 20 | (br00 & BLT_DST_TILED) | 6; + b[1] = br13; + b[2] = (r->y + dy) << 16 | (r->x + dx); + b[3] = (r->y + r->height + dy) << 16 | (r->x + r->width + dx); + *(uint64_t *)(b+4) = + kgem_add_reloc64(&sna->kgem, sna->kgem.nbatch + 4, bo, + I915_GEM_DOMAIN_RENDER << 16 | + I915_GEM_DOMAIN_RENDER | + KGEM_RELOC_FENCED, + 0); + *(uint64_t *)(b+6) = + kgem_add_reloc64(&sna->kgem, sna->kgem.nbatch + 6, tile_bo, + I915_GEM_DOMAIN_RENDER << 16 | + KGEM_RELOC_FENCED, + 0); + sna->kgem.nbatch += 8; + } else { + b[0] = XY_PAT_BLT | tx << 12 | ty << 8 | 3 << 20 | (br00 & BLT_DST_TILED) | 4; + b[1] = br13; + b[2] = (r->y + dy) << 16 | (r->x + dx); + b[3] = (r->y + r->height + dy) << 16 | (r->x + r->width + dx); + b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4, bo, + I915_GEM_DOMAIN_RENDER << 16 | + I915_GEM_DOMAIN_RENDER | + KGEM_RELOC_FENCED, + 0); + b[5] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 5, tile_bo, + I915_GEM_DOMAIN_RENDER << 16 | + KGEM_RELOC_FENCED, + 0); + sna->kgem.nbatch += 6; + } } else do { int n_this_time; assert(sna->kgem.mode == KGEM_BLT); b = sna->kgem.batch + sna->kgem.nbatch; - b[0] = XY_SETUP_BLT | 3 << 20; - b[1] = br13; - b[2] = 0; - b[3] = 0; - b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4, bo, - I915_GEM_DOMAIN_RENDER << 16 | - I915_GEM_DOMAIN_RENDER | - KGEM_RELOC_FENCED, - 0); - b[5] = gc->bgPixel; - b[6] = gc->fgPixel; - b[7] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 7, tile_bo, - I915_GEM_DOMAIN_RENDER << 16 | - KGEM_RELOC_FENCED, - 0); - sna->kgem.nbatch += 8; + if (sna->kgem.gen >= 0100) { + b[0] = XY_SETUP_BLT | 3 << 20 | 8; + b[1] = br13; + b[2] = 0; + b[3] = 0; + *(uint64_t *)(b+4) = + kgem_add_reloc64(&sna->kgem, sna->kgem.nbatch + 4, bo, + I915_GEM_DOMAIN_RENDER << 16 | + I915_GEM_DOMAIN_RENDER | + KGEM_RELOC_FENCED, + 0); + b[6] = gc->bgPixel; + b[7] = gc->fgPixel; + *(uint64_t *)(b+8) = + kgem_add_reloc64(&sna->kgem, sna->kgem.nbatch + 8, tile_bo, + I915_GEM_DOMAIN_RENDER << 16 | + KGEM_RELOC_FENCED, + 0); + sna->kgem.nbatch += 10; + } else { + b[0] = XY_SETUP_BLT | 3 << 20 | 6; + b[1] = br13; + b[2] = 0; + b[3] = 0; + b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4, bo, + I915_GEM_DOMAIN_RENDER << 16 | + I915_GEM_DOMAIN_RENDER | + KGEM_RELOC_FENCED, + 0); + b[5] = gc->bgPixel; + b[6] = gc->fgPixel; + b[7] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 7, tile_bo, + I915_GEM_DOMAIN_RENDER << 16 | + KGEM_RELOC_FENCED, + 0); + sna->kgem.nbatch += 8; + } n_this_time = n; if (3*n_this_time > sna->kgem.surface - sna->kgem.nbatch - KGEM_BATCH_RESERVED) @@ -11301,22 +11461,43 @@ sna_poly_fill_rect_tiled_8x8_blt(DrawablePtr drawable, assert(sna->kgem.mode == KGEM_BLT); b = sna->kgem.batch + sna->kgem.nbatch; - b[0] = XY_SETUP_BLT | 3 << 20; - b[1] = br13; - b[2] = 0; - b[3] = 0; - b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4, bo, - I915_GEM_DOMAIN_RENDER << 16 | - I915_GEM_DOMAIN_RENDER | - KGEM_RELOC_FENCED, - 0); - b[5] = gc->bgPixel; - b[6] = gc->fgPixel; - b[7] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 7, tile_bo, - I915_GEM_DOMAIN_RENDER << 16 | - KGEM_RELOC_FENCED, - 0); - sna->kgem.nbatch += 8; + if (sna->kgem.gen >= 0100) { + b[0] = XY_SETUP_BLT | 3 << 20 | 8; + b[1] = br13; + b[2] = 0; + b[3] = 0; + *(uint64_t *)(b+4) = + kgem_add_reloc64(&sna->kgem, sna->kgem.nbatch + 4, bo, + I915_GEM_DOMAIN_RENDER << 16 | + I915_GEM_DOMAIN_RENDER | + KGEM_RELOC_FENCED, + 0); + b[6] = gc->bgPixel; + b[7] = gc->fgPixel; + *(uint64_t *)(b+8) = + kgem_add_reloc64(&sna->kgem, sna->kgem.nbatch + 8, tile_bo, + I915_GEM_DOMAIN_RENDER << 16 | + KGEM_RELOC_FENCED, + 0); + sna->kgem.nbatch += 10; + } else { + b[0] = XY_SETUP_BLT | 3 << 20 | 6; + b[1] = br13; + b[2] = 0; + b[3] = 0; + b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4, bo, + I915_GEM_DOMAIN_RENDER << 16 | + I915_GEM_DOMAIN_RENDER | + KGEM_RELOC_FENCED, + 0); + b[5] = gc->bgPixel; + b[6] = gc->fgPixel; + b[7] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 7, tile_bo, + I915_GEM_DOMAIN_RENDER << 16 | + KGEM_RELOC_FENCED, + 0); + sna->kgem.nbatch += 8; + } if (clip.data == NULL) { const BoxRec *c = &clip.extents; @@ -11339,22 +11520,43 @@ sna_poly_fill_rect_tiled_8x8_blt(DrawablePtr drawable, assert(sna->kgem.mode == KGEM_BLT); b = sna->kgem.batch + sna->kgem.nbatch; - b[0] = XY_SETUP_BLT | 3 << 20; - b[1] = br13; - b[2] = 0; - b[3] = 0; - b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4, bo, - I915_GEM_DOMAIN_RENDER << 16 | - I915_GEM_DOMAIN_RENDER | - KGEM_RELOC_FENCED, - 0); - b[5] = gc->bgPixel; - b[6] = gc->fgPixel; - b[7] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 7, tile_bo, - I915_GEM_DOMAIN_RENDER << 16 | - KGEM_RELOC_FENCED, - 0); - sna->kgem.nbatch += 8; + if (sna->kgem.gen >= 0100) { + b[0] = XY_SETUP_BLT | 3 << 20 | 8; + b[1] = br13; + b[2] = 0; + b[3] = 0; + *(uint64_t *)(b+4) = + kgem_add_reloc64(&sna->kgem, sna->kgem.nbatch + 4, bo, + I915_GEM_DOMAIN_RENDER << 16 | + I915_GEM_DOMAIN_RENDER | + KGEM_RELOC_FENCED, + 0); + b[6] = gc->bgPixel; + b[7] = gc->fgPixel; + *(uint64_t *)(b+8) = + kgem_add_reloc64(&sna->kgem, sna->kgem.nbatch + 8, tile_bo, + I915_GEM_DOMAIN_RENDER << 16 | + KGEM_RELOC_FENCED, + 0); + sna->kgem.nbatch += 10; + } else { + b[0] = XY_SETUP_BLT | 3 << 20 | 6; + b[1] = br13; + b[2] = 0; + b[3] = 0; + b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4, bo, + I915_GEM_DOMAIN_RENDER << 16 | + I915_GEM_DOMAIN_RENDER | + KGEM_RELOC_FENCED, + 0); + b[5] = gc->bgPixel; + b[6] = gc->fgPixel; + b[7] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 7, tile_bo, + I915_GEM_DOMAIN_RENDER << 16 | + KGEM_RELOC_FENCED, + 0); + sna->kgem.nbatch += 8; + } } assert(box.x1 + dx >= 0); @@ -11412,22 +11614,43 @@ sna_poly_fill_rect_tiled_8x8_blt(DrawablePtr drawable, assert(sna->kgem.mode == KGEM_BLT); b = sna->kgem.batch + sna->kgem.nbatch; - b[0] = XY_SETUP_BLT | 3 << 20; - b[1] = br13; - b[2] = 0; - b[3] = 0; - b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4, bo, - I915_GEM_DOMAIN_RENDER << 16 | - I915_GEM_DOMAIN_RENDER | - KGEM_RELOC_FENCED, - 0); - b[5] = gc->bgPixel; - b[6] = gc->fgPixel; - b[7] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 7, tile_bo, - I915_GEM_DOMAIN_RENDER << 16 | - KGEM_RELOC_FENCED, - 0); - sna->kgem.nbatch += 8; + if (sna->kgem.gen >= 0100) { + b[0] = XY_SETUP_BLT | 3 << 20 | 8; + b[1] = br13; + b[2] = 0; + b[3] = 0; + *(uint64_t *)(b+4) = + kgem_add_reloc64(&sna->kgem, sna->kgem.nbatch + 4, bo, + I915_GEM_DOMAIN_RENDER << 16 | + I915_GEM_DOMAIN_RENDER | + KGEM_RELOC_FENCED, + 0); + b[6] = gc->bgPixel; + b[7] = gc->fgPixel; + *(uint64_t *)(b+8) = + kgem_add_reloc64(&sna->kgem, sna->kgem.nbatch + 8, tile_bo, + I915_GEM_DOMAIN_RENDER << 16 | + KGEM_RELOC_FENCED, + 0); + sna->kgem.nbatch += 10; + } else { + b[0] = XY_SETUP_BLT | 3 << 20 | 6; + b[1] = br13; + b[2] = 0; + b[3] = 0; + b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4, bo, + I915_GEM_DOMAIN_RENDER << 16 | + I915_GEM_DOMAIN_RENDER | + KGEM_RELOC_FENCED, + 0); + b[5] = gc->bgPixel; + b[6] = gc->fgPixel; + b[7] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 7, tile_bo, + I915_GEM_DOMAIN_RENDER << 16 | + KGEM_RELOC_FENCED, + 0); + sna->kgem.nbatch += 8; + } } assert(bb.x1 + dx >= 0); @@ -11840,7 +12063,7 @@ sna_poly_fill_rect_stippled_8x8_blt(DrawablePtr drawable, } kgem_set_mode(&sna->kgem, KGEM_BLT, bo); - if (!kgem_check_batch(&sna->kgem, 9 + 2*3) || + if (!kgem_check_batch(&sna->kgem, 10 + 2*3) || !kgem_check_bo_fenced(&sna->kgem, bo) || !kgem_check_reloc(&sna->kgem, 1)) { kgem_submit(&sna->kgem); @@ -11860,39 +12083,75 @@ sna_poly_fill_rect_stippled_8x8_blt(DrawablePtr drawable, assert(sna->kgem.mode == KGEM_BLT); b = sna->kgem.batch + sna->kgem.nbatch; - b[0] = XY_MONO_PAT | (br00 & (BLT_DST_TILED | 0x7<<12 | 0x7<<8)) | 3<<20; - b[1] = br13; - b[2] = (r->y + dy) << 16 | (r->x + dx); - b[3] = (r->y + r->height + dy) << 16 | (r->x + r->width + dx); - b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4, bo, - I915_GEM_DOMAIN_RENDER << 16 | - I915_GEM_DOMAIN_RENDER | - KGEM_RELOC_FENCED, - 0); - b[5] = gc->bgPixel; - b[6] = gc->fgPixel; - b[7] = pat[0]; - b[8] = pat[1]; - sna->kgem.nbatch += 9; + if (sna->kgem.gen >= 0100) { + b[0] = XY_MONO_PAT | (br00 & (BLT_DST_TILED | 0x7<<12 | 0x7<<8)) | 3<<20 | 8; + b[1] = br13; + b[2] = (r->y + dy) << 16 | (r->x + dx); + b[3] = (r->y + r->height + dy) << 16 | (r->x + r->width + dx); + *(uint64_t *)(b+4) = + kgem_add_reloc64(&sna->kgem, sna->kgem.nbatch + 4, bo, + I915_GEM_DOMAIN_RENDER << 16 | + I915_GEM_DOMAIN_RENDER | + KGEM_RELOC_FENCED, + 0); + b[6] = gc->bgPixel; + b[7] = gc->fgPixel; + b[8] = pat[0]; + b[9] = pat[1]; + sna->kgem.nbatch += 10; + } else { + b[0] = XY_MONO_PAT | (br00 & (BLT_DST_TILED | 0x7<<12 | 0x7<<8)) | 3<<20 | 7; + b[1] = br13; + b[2] = (r->y + dy) << 16 | (r->x + dx); + b[3] = (r->y + r->height + dy) << 16 | (r->x + r->width + dx); + b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4, bo, + I915_GEM_DOMAIN_RENDER << 16 | + I915_GEM_DOMAIN_RENDER | + KGEM_RELOC_FENCED, + 0); + b[5] = gc->bgPixel; + b[6] = gc->fgPixel; + b[7] = pat[0]; + b[8] = pat[1]; + sna->kgem.nbatch += 9; + } } else do { int n_this_time; assert(sna->kgem.mode == KGEM_BLT); b = sna->kgem.batch + sna->kgem.nbatch; - b[0] = XY_SETUP_MONO_PATTERN_SL_BLT | 3 << 20; - b[1] = br13; - b[2] = 0; - b[3] = 0; - b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4, bo, - I915_GEM_DOMAIN_RENDER << 16 | - I915_GEM_DOMAIN_RENDER | - KGEM_RELOC_FENCED, - 0); - b[5] = gc->bgPixel; - b[6] = gc->fgPixel; - b[7] = pat[0]; - b[8] = pat[1]; - sna->kgem.nbatch += 9; + if (sna->kgem.gen >= 0100) { + b[0] = XY_SETUP_MONO_PATTERN_SL_BLT | 3 << 20 | 8; + b[1] = br13; + b[2] = 0; + b[3] = 0; + *(uint64_t *)(b+4) = + kgem_add_reloc64(&sna->kgem, sna->kgem.nbatch + 4, bo, + I915_GEM_DOMAIN_RENDER << 16 | + I915_GEM_DOMAIN_RENDER | + KGEM_RELOC_FENCED, + 0); + b[6] = gc->bgPixel; + b[7] = gc->fgPixel; + b[8] = pat[0]; + b[9] = pat[1]; + sna->kgem.nbatch += 10; + } else { + b[0] = XY_SETUP_MONO_PATTERN_SL_BLT | 3 << 20 | 7; + b[1] = br13; + b[2] = 0; + b[3] = 0; + b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4, bo, + I915_GEM_DOMAIN_RENDER << 16 | + I915_GEM_DOMAIN_RENDER | + KGEM_RELOC_FENCED, + 0); + b[5] = gc->bgPixel; + b[6] = gc->fgPixel; + b[7] = pat[0]; + b[8] = pat[1]; + sna->kgem.nbatch += 9; + } n_this_time = n; if (3*n_this_time > sna->kgem.surface - sna->kgem.nbatch - KGEM_BATCH_RESERVED) @@ -11933,20 +12192,38 @@ sna_poly_fill_rect_stippled_8x8_blt(DrawablePtr drawable, assert(sna->kgem.mode == KGEM_BLT); b = sna->kgem.batch + sna->kgem.nbatch; - b[0] = XY_SETUP_MONO_PATTERN_SL_BLT | 3 << 20; - b[1] = br13; - b[2] = 0; - b[3] = 0; - b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4, bo, - I915_GEM_DOMAIN_RENDER << 16 | - I915_GEM_DOMAIN_RENDER | - KGEM_RELOC_FENCED, - 0); - b[5] = gc->bgPixel; - b[6] = gc->fgPixel; - b[7] = pat[0]; - b[8] = pat[1]; - sna->kgem.nbatch += 9; + if (sna->kgem.gen >= 0100) { + b[0] = XY_SETUP_MONO_PATTERN_SL_BLT | 3 << 20 | 8; + b[1] = br13; + b[2] = 0; + b[3] = 0; + *(uint64_t *)(b+4) = + kgem_add_reloc64(&sna->kgem, sna->kgem.nbatch + 4, bo, + I915_GEM_DOMAIN_RENDER << 16 | + I915_GEM_DOMAIN_RENDER | + KGEM_RELOC_FENCED, + 0); + b[6] = gc->bgPixel; + b[7] = gc->fgPixel; + b[8] = pat[0]; + b[9] = pat[1]; + sna->kgem.nbatch += 10; + } else { + b[0] = XY_SETUP_MONO_PATTERN_SL_BLT | 3 << 20 | 7; + b[1] = br13; + b[2] = 0; + b[3] = 0; + b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4, bo, + I915_GEM_DOMAIN_RENDER << 16 | + I915_GEM_DOMAIN_RENDER | + KGEM_RELOC_FENCED, + 0); + b[5] = gc->bgPixel; + b[6] = gc->fgPixel; + b[7] = pat[0]; + b[8] = pat[1]; + sna->kgem.nbatch += 9; + } if (clip.data == NULL) { do { @@ -11965,20 +12242,38 @@ sna_poly_fill_rect_stippled_8x8_blt(DrawablePtr drawable, assert(sna->kgem.mode == KGEM_BLT); b = sna->kgem.batch + sna->kgem.nbatch; - b[0] = XY_SETUP_MONO_PATTERN_SL_BLT | 3 << 20; - b[1] = br13; - b[2] = 0; - b[3] = 0; - b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4, bo, - I915_GEM_DOMAIN_RENDER << 16 | - I915_GEM_DOMAIN_RENDER | - KGEM_RELOC_FENCED, - 0); - b[5] = gc->bgPixel; - b[6] = gc->fgPixel; - b[7] = pat[0]; - b[8] = pat[1]; - sna->kgem.nbatch += 9; + if (sna->kgem.gen >= 0100) { + b[0] = XY_SETUP_MONO_PATTERN_SL_BLT | 3 << 20 | 8; + b[1] = br13; + b[2] = 0; + b[3] = 0; + *(uint64_t *)(b+4) = + kgem_add_reloc64(&sna->kgem, sna->kgem.nbatch + 4, bo, + I915_GEM_DOMAIN_RENDER << 16 | + I915_GEM_DOMAIN_RENDER | + KGEM_RELOC_FENCED, + 0); + b[6] = gc->bgPixel; + b[7] = gc->fgPixel; + b[8] = pat[0]; + b[9] = pat[1]; + sna->kgem.nbatch += 10; + } else { + b[0] = XY_SETUP_MONO_PATTERN_SL_BLT | 3 << 20 | 7; + b[1] = br13; + b[2] = 0; + b[3] = 0; + b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4, bo, + I915_GEM_DOMAIN_RENDER << 16 | + I915_GEM_DOMAIN_RENDER | + KGEM_RELOC_FENCED, + 0); + b[5] = gc->bgPixel; + b[6] = gc->fgPixel; + b[7] = pat[0]; + b[8] = pat[1]; + sna->kgem.nbatch += 9; + } } assert(sna->kgem.mode == KGEM_BLT); @@ -12019,20 +12314,38 @@ sna_poly_fill_rect_stippled_8x8_blt(DrawablePtr drawable, assert(sna->kgem.mode == KGEM_BLT); b = sna->kgem.batch + sna->kgem.nbatch; - b[0] = XY_SETUP_MONO_PATTERN_SL_BLT | 3 << 20; - b[1] = br13; - b[2] = 0; - b[3] = 0; - b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4, bo, - I915_GEM_DOMAIN_RENDER << 16 | - I915_GEM_DOMAIN_RENDER | - KGEM_RELOC_FENCED, - 0); - b[5] = gc->bgPixel; - b[6] = gc->fgPixel; - b[7] = pat[0]; - b[8] = pat[1]; - sna->kgem.nbatch += 9; + if (sna->kgem.gen >= 0100) { + b[0] = XY_SETUP_MONO_PATTERN_SL_BLT | 3 << 20 | 8; + b[1] = br13; + b[2] = 0; + b[3] = 0; + *(uint64_t *)(b+4) = + kgem_add_reloc64(&sna->kgem, sna->kgem.nbatch + 4, bo, + I915_GEM_DOMAIN_RENDER << 16 | + I915_GEM_DOMAIN_RENDER | + KGEM_RELOC_FENCED, + 0); + b[6] = gc->bgPixel; + b[7] = gc->fgPixel; + b[8] = pat[0]; + b[9] = pat[1]; + sna->kgem.nbatch += 10; + } else { + b[0] = XY_SETUP_MONO_PATTERN_SL_BLT | 3 << 20 | 7; + b[1] = br13; + b[2] = 0; + b[3] = 0; + b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4, bo, + I915_GEM_DOMAIN_RENDER << 16 | + I915_GEM_DOMAIN_RENDER | + KGEM_RELOC_FENCED, + 0); + b[5] = gc->bgPixel; + b[6] = gc->fgPixel; + b[7] = pat[0]; + b[8] = pat[1]; + sna->kgem.nbatch += 9; + } } assert(sna->kgem.mode == KGEM_BLT); @@ -12161,7 +12474,7 @@ sna_poly_fill_rect_stippled_1_blt(DrawablePtr drawable, if (src_stride <= 128) { src_stride = ALIGN(src_stride, 8) / 4; assert(src_stride <= 32); - if (!kgem_check_batch(&sna->kgem, 7+src_stride) || + if (!kgem_check_batch(&sna->kgem, 8+src_stride) || !kgem_check_bo_fenced(&sna->kgem, bo) || !kgem_check_reloc(&sna->kgem, 1)) { kgem_submit(&sna->kgem); @@ -12172,23 +12485,40 @@ sna_poly_fill_rect_stippled_1_blt(DrawablePtr drawable, assert(sna->kgem.mode == KGEM_BLT); b = sna->kgem.batch + sna->kgem.nbatch; - b[0] = XY_MONO_SRC_COPY_IMM | (5 + src_stride) | br00; - b[0] |= ((r->x - origin->x) & 7) << 17; - b[1] = br13; - b[2] = (r->y + dy) << 16 | (r->x + dx); - b[3] = (r->y + r->height + dy) << 16 | (r->x + r->width + dx); - b[4] = kgem_add_reloc(&sna->kgem, - sna->kgem.nbatch + 4, bo, - I915_GEM_DOMAIN_RENDER << 16 | - I915_GEM_DOMAIN_RENDER | - KGEM_RELOC_FENCED, - 0); - b[5] = gc->bgPixel; - b[6] = gc->fgPixel; + if (sna->kgem.gen >= 0100) { + b[0] = XY_MONO_SRC_COPY_IMM | (6 + src_stride) | br00; + b[0] |= ((r->x - origin->x) & 7) << 17; + b[1] = br13; + b[2] = (r->y + dy) << 16 | (r->x + dx); + b[3] = (r->y + r->height + dy) << 16 | (r->x + r->width + dx); + *(uint64_t *)(b+4) = + kgem_add_reloc64(&sna->kgem, sna->kgem.nbatch + 4, bo, + I915_GEM_DOMAIN_RENDER << 16 | + I915_GEM_DOMAIN_RENDER | + KGEM_RELOC_FENCED, + 0); + b[6] = gc->bgPixel; + b[7] = gc->fgPixel; - sna->kgem.nbatch += 7 + src_stride; + dst = (uint8_t *)&b[8]; + sna->kgem.nbatch += 8 + src_stride; + } else { + b[0] = XY_MONO_SRC_COPY_IMM | (5 + src_stride) | br00; + b[0] |= ((r->x - origin->x) & 7) << 17; + b[1] = br13; + b[2] = (r->y + dy) << 16 | (r->x + dx); + b[3] = (r->y + r->height + dy) << 16 | (r->x + r->width + dx); + b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4, bo, + I915_GEM_DOMAIN_RENDER << 16 | + I915_GEM_DOMAIN_RENDER | + KGEM_RELOC_FENCED, + 0); + b[5] = gc->bgPixel; + b[6] = gc->fgPixel; - dst = (uint8_t *)&b[7]; + dst = (uint8_t *)&b[7]; + sna->kgem.nbatch += 7 + src_stride; + } src_stride = stipple->devKind; src = stipple->devPrivate.ptr; src += (r->y - origin->y) * src_stride + bx1/8; @@ -12206,7 +12536,7 @@ sna_poly_fill_rect_stippled_1_blt(DrawablePtr drawable, struct kgem_bo *upload; void *ptr; - if (!kgem_check_batch(&sna->kgem, 8) || + if (!kgem_check_batch(&sna->kgem, 10) || !kgem_check_bo_fenced(&sna->kgem, bo) || !kgem_check_reloc_and_exec(&sna->kgem, 2)) { kgem_submit(&sna->kgem); @@ -12240,28 +12570,49 @@ sna_poly_fill_rect_stippled_1_blt(DrawablePtr drawable, assert(sna->kgem.mode == KGEM_BLT); b = sna->kgem.batch + sna->kgem.nbatch; - b[0] = XY_MONO_SRC_COPY | br00; - b[0] |= ((r->x - origin->x) & 7) << 17; - b[1] = br13; - b[2] = (r->y + dy) << 16 | (r->x + dx); - b[3] = (r->y + r->height + dy) << 16 | (r->x + r->width + dx); - b[4] = kgem_add_reloc(&sna->kgem, - sna->kgem.nbatch + 4, bo, - I915_GEM_DOMAIN_RENDER << 16 | - I915_GEM_DOMAIN_RENDER | - KGEM_RELOC_FENCED, - 0); - b[5] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 5, - upload, - I915_GEM_DOMAIN_RENDER << 16 | - KGEM_RELOC_FENCED, - 0); - b[6] = gc->bgPixel; - b[7] = gc->fgPixel; + if (sna->kgem.gen >= 0100) { + b[0] = XY_MONO_SRC_COPY | br00 | 8; + b[0] |= ((r->x - origin->x) & 7) << 17; + b[1] = br13; + b[2] = (r->y + dy) << 16 | (r->x + dx); + b[3] = (r->y + r->height + dy) << 16 | (r->x + r->width + dx); + *(uint64_t *)(b+4) = + kgem_add_reloc64(&sna->kgem, sna->kgem.nbatch + 4, bo, + I915_GEM_DOMAIN_RENDER << 16 | + I915_GEM_DOMAIN_RENDER | + KGEM_RELOC_FENCED, + 0); + *(uint64_t *)(b+6) = + kgem_add_reloc64(&sna->kgem, sna->kgem.nbatch + 6, upload, + I915_GEM_DOMAIN_RENDER << 16 | + KGEM_RELOC_FENCED, + 0); + b[8] = gc->bgPixel; + b[9] = gc->fgPixel; + sna->kgem.nbatch += 10; + } else { + b[0] = XY_MONO_SRC_COPY | br00 | 6; + b[0] |= ((r->x - origin->x) & 7) << 17; + b[1] = br13; + b[2] = (r->y + dy) << 16 | (r->x + dx); + b[3] = (r->y + r->height + dy) << 16 | (r->x + r->width + dx); + b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4, bo, + I915_GEM_DOMAIN_RENDER << 16 | + I915_GEM_DOMAIN_RENDER | + KGEM_RELOC_FENCED, + 0); + b[5] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 5, upload, + I915_GEM_DOMAIN_RENDER << 16 | + KGEM_RELOC_FENCED, + 0); + b[6] = gc->bgPixel; + b[7] = gc->fgPixel; - sna->kgem.nbatch += 8; + sna->kgem.nbatch += 8; + } sigtrap_put(); } + kgem_bo_destroy(&sna->kgem, upload); } @@ -12313,7 +12664,7 @@ sna_poly_fill_rect_stippled_1_blt(DrawablePtr drawable, if (src_stride <= 128) { src_stride = ALIGN(src_stride, 8) / 4; assert(src_stride <= 32); - if (!kgem_check_batch(&sna->kgem, 7+src_stride) || + if (!kgem_check_batch(&sna->kgem, 8+src_stride) || !kgem_check_bo_fenced(&sna->kgem, bo) || !kgem_check_reloc(&sna->kgem, 1)) { kgem_submit(&sna->kgem); @@ -12324,23 +12675,41 @@ sna_poly_fill_rect_stippled_1_blt(DrawablePtr drawable, assert(sna->kgem.mode == KGEM_BLT); b = sna->kgem.batch + sna->kgem.nbatch; - b[0] = XY_MONO_SRC_COPY_IMM | (5 + src_stride) | br00; - b[0] |= ((box.x1 - pat.x) & 7) << 17; - b[1] = br13; - b[2] = (box.y1 + dy) << 16 | (box.x1 + dx); - b[3] = (box.y2 + dy) << 16 | (box.x2 + dx); - b[4] = kgem_add_reloc(&sna->kgem, - sna->kgem.nbatch + 4, bo, - I915_GEM_DOMAIN_RENDER << 16 | - I915_GEM_DOMAIN_RENDER | - KGEM_RELOC_FENCED, - 0); - b[5] = gc->bgPixel; - b[6] = gc->fgPixel; + if (sna->kgem.gen >= 0100) { + b[0] = XY_MONO_SRC_COPY_IMM | (6 + src_stride) | br00; + b[0] |= ((box.x1 - pat.x) & 7) << 17; + b[1] = br13; + b[2] = (box.y1 + dy) << 16 | (box.x1 + dx); + b[3] = (box.y2 + dy) << 16 | (box.x2 + dx); + *(uint64_t *)(b+4) = + kgem_add_reloc64(&sna->kgem, sna->kgem.nbatch + 4, bo, + I915_GEM_DOMAIN_RENDER << 16 | + I915_GEM_DOMAIN_RENDER | + KGEM_RELOC_FENCED, + 0); + b[6] = gc->bgPixel; + b[7] = gc->fgPixel; - sna->kgem.nbatch += 7 + src_stride; + dst = (uint8_t *)&b[8]; + sna->kgem.nbatch += 8 + src_stride; + } else { + b[0] = XY_MONO_SRC_COPY_IMM | (5 + src_stride) | br00; + b[0] |= ((box.x1 - pat.x) & 7) << 17; + b[1] = br13; + b[2] = (box.y1 + dy) << 16 | (box.x1 + dx); + b[3] = (box.y2 + dy) << 16 | (box.x2 + dx); + b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4, bo, + I915_GEM_DOMAIN_RENDER << 16 | + I915_GEM_DOMAIN_RENDER | + KGEM_RELOC_FENCED, + 0); + b[5] = gc->bgPixel; + b[6] = gc->fgPixel; + + dst = (uint8_t *)&b[7]; + sna->kgem.nbatch += 7 + src_stride; + } - dst = (uint8_t *)&b[7]; src_stride = stipple->devKind; src = stipple->devPrivate.ptr; src += (box.y1 - pat.y) * src_stride + bx1/8; @@ -12355,7 +12724,7 @@ sna_poly_fill_rect_stippled_1_blt(DrawablePtr drawable, src += src_stride; } while (--bh); } else { - if (!kgem_check_batch(&sna->kgem, 8) || + if (!kgem_check_batch(&sna->kgem, 10) || !kgem_check_bo_fenced(&sna->kgem, bo) || !kgem_check_reloc_and_exec(&sna->kgem, 2)) { kgem_submit(&sna->kgem); @@ -12389,26 +12758,46 @@ sna_poly_fill_rect_stippled_1_blt(DrawablePtr drawable, assert(sna->kgem.mode == KGEM_BLT); b = sna->kgem.batch + sna->kgem.nbatch; - b[0] = XY_MONO_SRC_COPY | br00; - b[0] |= ((box.x1 - pat.x) & 7) << 17; - b[1] = br13; - b[2] = (box.y1 + dy) << 16 | (box.x1 + dx); - b[3] = (box.y2 + dy) << 16 | (box.x2 + dx); - b[4] = kgem_add_reloc(&sna->kgem, - sna->kgem.nbatch + 4, bo, - I915_GEM_DOMAIN_RENDER << 16 | - I915_GEM_DOMAIN_RENDER | - KGEM_RELOC_FENCED, - 0); - b[5] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 5, - upload, - I915_GEM_DOMAIN_RENDER << 16 | - KGEM_RELOC_FENCED, - 0); - b[6] = gc->bgPixel; - b[7] = gc->fgPixel; + if (sna->kgem.gen >= 0100) { + b[0] = XY_MONO_SRC_COPY | br00 | 8; + b[0] |= ((box.x1 - pat.x) & 7) << 17; + b[1] = br13; + b[2] = (box.y1 + dy) << 16 | (box.x1 + dx); + b[3] = (box.y2 + dy) << 16 | (box.x2 + dx); + *(uint64_t *)(b+4) = + kgem_add_reloc64(&sna->kgem, sna->kgem.nbatch + 4, bo, + I915_GEM_DOMAIN_RENDER << 16 | + I915_GEM_DOMAIN_RENDER | + KGEM_RELOC_FENCED, + 0); + *(uint64_t *)(b+5) = + kgem_add_reloc64(&sna->kgem, sna->kgem.nbatch + 6, upload, + I915_GEM_DOMAIN_RENDER << 16 | + KGEM_RELOC_FENCED, + 0); + b[8] = gc->bgPixel; + b[9] = gc->fgPixel; + sna->kgem.nbatch += 10; + } else { + b[0] = XY_MONO_SRC_COPY | br00 | 6; + b[0] |= ((box.x1 - pat.x) & 7) << 17; + b[1] = br13; + b[2] = (box.y1 + dy) << 16 | (box.x1 + dx); + b[3] = (box.y2 + dy) << 16 | (box.x2 + dx); + b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4, bo, + I915_GEM_DOMAIN_RENDER << 16 | + I915_GEM_DOMAIN_RENDER | + KGEM_RELOC_FENCED, + 0); + b[5] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 5, upload, + I915_GEM_DOMAIN_RENDER << 16 | + KGEM_RELOC_FENCED, + 0); + b[6] = gc->bgPixel; + b[7] = gc->fgPixel; - sna->kgem.nbatch += 8; + sna->kgem.nbatch += 8; + } sigtrap_put(); } kgem_bo_destroy(&sna->kgem, upload); @@ -12463,7 +12852,7 @@ sna_poly_fill_rect_stippled_1_blt(DrawablePtr drawable, if (src_stride <= 128) { src_stride = ALIGN(src_stride, 8) / 4; assert(src_stride <= 32); - if (!kgem_check_batch(&sna->kgem, 7+src_stride) || + if (!kgem_check_batch(&sna->kgem, 8+src_stride) || !kgem_check_bo_fenced(&sna->kgem, bo) || !kgem_check_reloc(&sna->kgem, 1)) { kgem_submit(&sna->kgem); @@ -12474,23 +12863,40 @@ sna_poly_fill_rect_stippled_1_blt(DrawablePtr drawable, assert(sna->kgem.mode == KGEM_BLT); b = sna->kgem.batch + sna->kgem.nbatch; - b[0] = XY_MONO_SRC_COPY_IMM | (5 + src_stride) | br00; - b[0] |= ((box.x1 - pat.x) & 7) << 17; - b[1] = br13; - b[2] = (box.y1 + dy) << 16 | (box.x1 + dx); - b[3] = (box.y2 + dy) << 16 | (box.x2 + dx); - b[4] = kgem_add_reloc(&sna->kgem, - sna->kgem.nbatch + 4, bo, - I915_GEM_DOMAIN_RENDER << 16 | - I915_GEM_DOMAIN_RENDER | - KGEM_RELOC_FENCED, - 0); - b[5] = gc->bgPixel; - b[6] = gc->fgPixel; + if (sna->kgem.gen >= 0100) { + b[0] = XY_MONO_SRC_COPY_IMM | (6 + src_stride) | br00; + b[0] |= ((box.x1 - pat.x) & 7) << 17; + b[1] = br13; + b[2] = (box.y1 + dy) << 16 | (box.x1 + dx); + b[3] = (box.y2 + dy) << 16 | (box.x2 + dx); + *(uint64_t *)(b+4) = + kgem_add_reloc64(&sna->kgem, sna->kgem.nbatch + 4, bo, + I915_GEM_DOMAIN_RENDER << 16 | + I915_GEM_DOMAIN_RENDER | + KGEM_RELOC_FENCED, + 0); + b[6] = gc->bgPixel; + b[7] = gc->fgPixel; - sna->kgem.nbatch += 7 + src_stride; + dst = (uint8_t *)&b[8]; + sna->kgem.nbatch += 8 + src_stride; + } else { + b[0] = XY_MONO_SRC_COPY_IMM | (5 + src_stride) | br00; + b[0] |= ((box.x1 - pat.x) & 7) << 17; + b[1] = br13; + b[2] = (box.y1 + dy) << 16 | (box.x1 + dx); + b[3] = (box.y2 + dy) << 16 | (box.x2 + dx); + b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4, bo, + I915_GEM_DOMAIN_RENDER << 16 | + I915_GEM_DOMAIN_RENDER | + KGEM_RELOC_FENCED, + 0); + b[5] = gc->bgPixel; + b[6] = gc->fgPixel; - dst = (uint8_t *)&b[7]; + dst = (uint8_t *)&b[7]; + sna->kgem.nbatch += 7 + src_stride; + } src_stride = stipple->devKind; src = stipple->devPrivate.ptr; src += (box.y1 - pat.y) * src_stride + bx1/8; @@ -12505,7 +12911,7 @@ sna_poly_fill_rect_stippled_1_blt(DrawablePtr drawable, src += src_stride; } while (--bh); } else { - if (!kgem_check_batch(&sna->kgem, 8) || + if (!kgem_check_batch(&sna->kgem, 10) || !kgem_check_bo_fenced(&sna->kgem, bo) || !kgem_check_reloc_and_exec(&sna->kgem, 2)) { kgem_submit(&sna->kgem); @@ -12539,26 +12945,46 @@ sna_poly_fill_rect_stippled_1_blt(DrawablePtr drawable, assert(sna->kgem.mode == KGEM_BLT); b = sna->kgem.batch + sna->kgem.nbatch; - b[0] = XY_MONO_SRC_COPY | br00; - b[0] |= ((box.x1 - pat.x) & 7) << 17; - b[1] = br13; - b[2] = (box.y1 + dy) << 16 | (box.x1 + dx); - b[3] = (box.y2 + dy) << 16 | (box.x2 + dx); - b[4] = kgem_add_reloc(&sna->kgem, - sna->kgem.nbatch + 4, bo, - I915_GEM_DOMAIN_RENDER << 16 | - I915_GEM_DOMAIN_RENDER | - KGEM_RELOC_FENCED, - 0); - b[5] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 5, - upload, - I915_GEM_DOMAIN_RENDER << 16 | - KGEM_RELOC_FENCED, - 0); - b[6] = gc->bgPixel; - b[7] = gc->fgPixel; - - sna->kgem.nbatch += 8; + if (sna->kgem.gen >= 0100) { + b[0] = XY_MONO_SRC_COPY | br00 | 8; + b[0] |= ((box.x1 - pat.x) & 7) << 17; + b[1] = br13; + b[2] = (box.y1 + dy) << 16 | (box.x1 + dx); + b[3] = (box.y2 + dy) << 16 | (box.x2 + dx); + *(uint64_t *)(b+4) = + kgem_add_reloc64(&sna->kgem, sna->kgem.nbatch + 4, bo, + I915_GEM_DOMAIN_RENDER << 16 | + I915_GEM_DOMAIN_RENDER | + KGEM_RELOC_FENCED, + 0); + *(uint64_t *)(b+6) = + kgem_add_reloc64(&sna->kgem, sna->kgem.nbatch + 6, upload, + I915_GEM_DOMAIN_RENDER << 16 | + KGEM_RELOC_FENCED, + 0); + b[8] = gc->bgPixel; + b[9] = gc->fgPixel; + sna->kgem.nbatch += 10; + } else { + b[0] = XY_MONO_SRC_COPY | br00 | 6; + b[0] |= ((box.x1 - pat.x) & 7) << 17; + b[1] = br13; + b[2] = (box.y1 + dy) << 16 | (box.x1 + dx); + b[3] = (box.y2 + dy) << 16 | (box.x2 + dx); + b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4, bo, + I915_GEM_DOMAIN_RENDER << 16 | + I915_GEM_DOMAIN_RENDER | + KGEM_RELOC_FENCED, + 0); + b[5] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 5, upload, + I915_GEM_DOMAIN_RENDER << 16 | + KGEM_RELOC_FENCED, + 0); + b[6] = gc->bgPixel; + b[7] = gc->fgPixel; + + sna->kgem.nbatch += 8; + } sigtrap_put(); } kgem_bo_destroy(&sna->kgem, upload); @@ -12622,7 +13048,7 @@ sna_poly_fill_rect_stippled_n_box__imm(struct sna *sna, len = bw*bh; len = ALIGN(len, 8) / 4; assert(len <= 32); - if (!kgem_check_batch(&sna->kgem, 7+len) || + if (!kgem_check_batch(&sna->kgem, 8+len) || !kgem_check_bo_fenced(&sna->kgem, bo) || !kgem_check_reloc(&sna->kgem, 1)) { kgem_submit(&sna->kgem); @@ -12633,22 +13059,37 @@ sna_poly_fill_rect_stippled_n_box__imm(struct sna *sna, assert(sna->kgem.mode == KGEM_BLT); b = sna->kgem.batch + sna->kgem.nbatch; - b[0] = br00 | (5 + len) | (ox & 7) << 17; - b[1] = br13; - b[2] = y1 << 16 | x1; - b[3] = y2 << 16 | x2; - b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4, - bo, - I915_GEM_DOMAIN_RENDER << 16 | - I915_GEM_DOMAIN_RENDER | - KGEM_RELOC_FENCED, - 0); - b[5] = gc->bgPixel; - b[6] = gc->fgPixel; - - sna->kgem.nbatch += 7 + len; + if (sna->kgem.gen >= 0100) { + b[0] = br00 | (6 + len) | (ox & 7) << 17; + b[1] = br13; + b[2] = y1 << 16 | x1; + b[3] = y2 << 16 | x2; + *(uint64_t *)(b+4) = + kgem_add_reloc64(&sna->kgem, sna->kgem.nbatch + 4, bo, + I915_GEM_DOMAIN_RENDER << 16 | + I915_GEM_DOMAIN_RENDER | + KGEM_RELOC_FENCED, + 0); + b[6] = gc->bgPixel; + b[7] = gc->fgPixel; + dst = (uint8_t *)&b[8]; + sna->kgem.nbatch += 8 + len; + } else { + b[0] = br00 | (5 + len) | (ox & 7) << 17; + b[1] = br13; + b[2] = y1 << 16 | x1; + b[3] = y2 << 16 | x2; + b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4, bo, + I915_GEM_DOMAIN_RENDER << 16 | + I915_GEM_DOMAIN_RENDER | + KGEM_RELOC_FENCED, + 0); + b[5] = gc->bgPixel; + b[6] = gc->fgPixel; + dst = (uint8_t *)&b[7]; + sna->kgem.nbatch += 7 + len; + } - dst = (uint8_t *)&b[7]; len = gc->stipple->devKind; src = gc->stipple->devPrivate.ptr; src += oy*len + ox/8; @@ -12729,7 +13170,7 @@ sna_poly_fill_rect_stippled_n_box(struct sna *sna, len = bw*bh; len = ALIGN(len, 8) / 4; - if (!kgem_check_batch(&sna->kgem, 7+len) || + if (!kgem_check_batch(&sna->kgem, 8+len) || !kgem_check_bo_fenced(&sna->kgem, bo) || !kgem_check_reloc(&sna->kgem, 2)) { kgem_submit(&sna->kgem); @@ -12744,25 +13185,45 @@ sna_poly_fill_rect_stippled_n_box(struct sna *sna, if (!use_tile && len <= 32) { uint8_t *dst, *src; - b[0] = XY_MONO_SRC_COPY_IMM; - b[0] |= (br00 & (BLT_DST_TILED | 3 << 20)); - b[0] |= (ox & 7) << 17; - b[0] |= (5 + len); - b[1] = br13; - b[2] = y1 << 16 | x1; - b[3] = y2 << 16 | x2; - b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4, - bo, - I915_GEM_DOMAIN_RENDER << 16 | - I915_GEM_DOMAIN_RENDER | - KGEM_RELOC_FENCED, - 0); - b[5] = gc->bgPixel; - b[6] = gc->fgPixel; + if (sna->kgem.gen >= 0100) { + b[0] = XY_MONO_SRC_COPY_IMM; + b[0] |= (br00 & (BLT_DST_TILED | 3 << 20)); + b[0] |= (ox & 7) << 17; + b[0] |= (6 + len); + b[1] = br13; + b[2] = y1 << 16 | x1; + b[3] = y2 << 16 | x2; + *(uint64_t *)(b+4) = + kgem_add_reloc64(&sna->kgem, sna->kgem.nbatch + 4, bo, + I915_GEM_DOMAIN_RENDER << 16 | + I915_GEM_DOMAIN_RENDER | + KGEM_RELOC_FENCED, + 0); + b[6] = gc->bgPixel; + b[7] = gc->fgPixel; - sna->kgem.nbatch += 7 + len; + dst = (uint8_t *)&b[8]; + sna->kgem.nbatch += 8 + len; + } else { + b[0] = XY_MONO_SRC_COPY_IMM; + b[0] |= (br00 & (BLT_DST_TILED | 3 << 20)); + b[0] |= (ox & 7) << 17; + b[0] |= (5 + len); + b[1] = br13; + b[2] = y1 << 16 | x1; + b[3] = y2 << 16 | x2; + b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4, bo, + I915_GEM_DOMAIN_RENDER << 16 | + I915_GEM_DOMAIN_RENDER | + KGEM_RELOC_FENCED, + 0); + b[5] = gc->bgPixel; + b[6] = gc->fgPixel; + + dst = (uint8_t *)&b[7]; + sna->kgem.nbatch += 7 + len; + } - dst = (uint8_t *)&b[7]; len = gc->stipple->devKind; src = gc->stipple->devPrivate.ptr; src += oy*len + ox/8; @@ -12794,25 +13255,43 @@ sna_poly_fill_rect_stippled_n_box(struct sna *sna, assert(sna->kgem.mode == KGEM_BLT); b = sna->kgem.batch + sna->kgem.nbatch; - b[0] = br00 | (ox & 7) << 17; - b[1] = br13; - b[2] = y1 << 16 | x1; - b[3] = y2 << 16 | x2; - b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4, - bo, - I915_GEM_DOMAIN_RENDER << 16 | - I915_GEM_DOMAIN_RENDER | - KGEM_RELOC_FENCED, - 0); - b[5] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 5, - upload, - I915_GEM_DOMAIN_RENDER << 16 | - KGEM_RELOC_FENCED, - 0); - b[6] = gc->bgPixel; - b[7] = gc->fgPixel; - - sna->kgem.nbatch += 8; + if (sna->kgem.gen >= 0100) { + b[0] = br00 | (ox & 7) << 17 | 8; + b[1] = br13; + b[2] = y1 << 16 | x1; + b[3] = y2 << 16 | x2; + *(uint64_t *)(b+4) = + kgem_add_reloc64(&sna->kgem, sna->kgem.nbatch + 4, bo, + I915_GEM_DOMAIN_RENDER << 16 | + I915_GEM_DOMAIN_RENDER | + KGEM_RELOC_FENCED, + 0); + *(uint64_t *)(b+6) = + kgem_add_reloc64(&sna->kgem, sna->kgem.nbatch + 6, upload, + I915_GEM_DOMAIN_RENDER << 16 | + KGEM_RELOC_FENCED, + 0); + b[8] = gc->bgPixel; + b[9] = gc->fgPixel; + sna->kgem.nbatch += 10; + } else { + b[0] = br00 | (ox & 7) << 17 | 6; + b[1] = br13; + b[2] = y1 << 16 | x1; + b[3] = y2 << 16 | x2; + b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4, bo, + I915_GEM_DOMAIN_RENDER << 16 | + I915_GEM_DOMAIN_RENDER | + KGEM_RELOC_FENCED, + 0); + b[5] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 5, upload, + I915_GEM_DOMAIN_RENDER << 16 | + KGEM_RELOC_FENCED, + 0); + b[6] = gc->bgPixel; + b[7] = gc->fgPixel; + sna->kgem.nbatch += 8; + } if (!has_tile) { dst = ptr; @@ -13702,7 +14181,7 @@ sna_glyph_blt(DrawablePtr drawable, GCPtr gc, } kgem_set_mode(&sna->kgem, KGEM_BLT, bo); - if (!kgem_check_batch(&sna->kgem, 16) || + if (!kgem_check_batch(&sna->kgem, 20) || !kgem_check_bo_fenced(&sna->kgem, bo) || !kgem_check_reloc(&sna->kgem, 1)) { kgem_submit(&sna->kgem); @@ -13723,24 +14202,47 @@ sna_glyph_blt(DrawablePtr drawable, GCPtr gc, assert(sna->kgem.mode == KGEM_BLT); b = sna->kgem.batch + sna->kgem.nbatch; - b[0] = XY_SETUP_BLT | 3 << 20; - b[1] = bo->pitch; - if (sna->kgem.gen >= 040 && bo->tiling) { - b[0] |= BLT_DST_TILED; - b[1] >>= 2; - } - b[1] |= 1 << 30 | transparent << 29 | blt_depth(drawable->depth) << 24 | rop << 16; - b[2] = extents->y1 << 16 | extents->x1; - b[3] = extents->y2 << 16 | extents->x2; - b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4, bo, - I915_GEM_DOMAIN_RENDER << 16 | - I915_GEM_DOMAIN_RENDER | - KGEM_RELOC_FENCED, - 0); - b[5] = bg; - b[6] = fg; - b[7] = 0; - sna->kgem.nbatch += 8; + if (sna->kgem.gen >= 0100) { + b[0] = XY_SETUP_BLT | 3 << 20 | 8; + b[1] = bo->pitch; + if (sna->kgem.gen >= 040 && bo->tiling) { + b[0] |= BLT_DST_TILED; + b[1] >>= 2; + } + b[1] |= 1 << 30 | transparent << 29 | blt_depth(drawable->depth) << 24 | rop << 16; + b[2] = extents->y1 << 16 | extents->x1; + b[3] = extents->y2 << 16 | extents->x2; + *(uint64_t *)(b+4) = + kgem_add_reloc64(&sna->kgem, sna->kgem.nbatch + 4, bo, + I915_GEM_DOMAIN_RENDER << 16 | + I915_GEM_DOMAIN_RENDER | + KGEM_RELOC_FENCED, + 0); + b[6] = bg; + b[7] = fg; + b[8] = 0; + b[9] = 0; + sna->kgem.nbatch += 10; + } else { + b[0] = XY_SETUP_BLT | 3 << 20 | 6; + b[1] = bo->pitch; + if (sna->kgem.gen >= 040 && bo->tiling) { + b[0] |= BLT_DST_TILED; + b[1] >>= 2; + } + b[1] |= 1 << 30 | transparent << 29 | blt_depth(drawable->depth) << 24 | rop << 16; + b[2] = extents->y1 << 16 | extents->x1; + b[3] = extents->y2 << 16 | extents->x2; + b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4, bo, + I915_GEM_DOMAIN_RENDER << 16 | + I915_GEM_DOMAIN_RENDER | + KGEM_RELOC_FENCED, + 0); + b[5] = bg; + b[6] = fg; + b[7] = 0; + sna->kgem.nbatch += 8; + } br00 = XY_TEXT_IMMEDIATE_BLT; if (bo->tiling && sna->kgem.gen >= 040) @@ -13786,24 +14288,47 @@ sna_glyph_blt(DrawablePtr drawable, GCPtr gc, assert(sna->kgem.mode == KGEM_BLT); b = sna->kgem.batch + sna->kgem.nbatch; - b[0] = XY_SETUP_BLT | 3 << 20; - b[1] = bo->pitch; - if (sna->kgem.gen >= 040 && bo->tiling) { - b[0] |= BLT_DST_TILED; - b[1] >>= 2; + if (sna->kgem.gen >= 0100) { + b[0] = XY_SETUP_BLT | 3 << 20 | 8; + b[1] = bo->pitch; + if (bo->tiling) { + b[0] |= BLT_DST_TILED; + b[1] >>= 2; + } + b[1] |= 1 << 30 | transparent << 29 | blt_depth(drawable->depth) << 24 | rop << 16; + b[2] = extents->y1 << 16 | extents->x1; + b[3] = extents->y2 << 16 | extents->x2; + *(uint64_t *)(b+4) = + kgem_add_reloc64(&sna->kgem, sna->kgem.nbatch + 4, bo, + I915_GEM_DOMAIN_RENDER << 16 | + I915_GEM_DOMAIN_RENDER | + KGEM_RELOC_FENCED, + 0); + b[6] = bg; + b[7] = fg; + b[8] = 0; + b[9] = 0; + sna->kgem.nbatch += 10; + } else { + b[0] = XY_SETUP_BLT | 3 << 20 | 6; + b[1] = bo->pitch; + if (sna->kgem.gen >= 040 && bo->tiling) { + b[0] |= BLT_DST_TILED; + b[1] >>= 2; + } + b[1] |= 1 << 30 | transparent << 29 | blt_depth(drawable->depth) << 24 | rop << 16; + b[2] = extents->y1 << 16 | extents->x1; + b[3] = extents->y2 << 16 | extents->x2; + b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4, bo, + I915_GEM_DOMAIN_RENDER << 16 | + I915_GEM_DOMAIN_RENDER | + KGEM_RELOC_FENCED, + 0); + b[5] = bg; + b[6] = fg; + b[7] = 0; + sna->kgem.nbatch += 8; } - b[1] |= 1 << 30 | transparent << 29 | blt_depth(drawable->depth) << 24 | rop << 16; - b[2] = extents->y1 << 16 | extents->x1; - b[3] = extents->y2 << 16 | extents->x2; - b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4, bo, - I915_GEM_DOMAIN_RENDER << 16 | - I915_GEM_DOMAIN_RENDER | - KGEM_RELOC_FENCED, - 0); - b[5] = bg; - b[6] = fg; - b[7] = 0; - sna->kgem.nbatch += 8; } assert(sna->kgem.mode == KGEM_BLT); @@ -14370,7 +14895,7 @@ sna_reversed_glyph_blt(DrawablePtr drawable, GCPtr gc, } kgem_set_mode(&sna->kgem, KGEM_BLT, bo); - if (!kgem_check_batch(&sna->kgem, 16) || + if (!kgem_check_batch(&sna->kgem, 20) || !kgem_check_bo_fenced(&sna->kgem, bo) || !kgem_check_reloc(&sna->kgem, 1)) { kgem_submit(&sna->kgem); @@ -14391,24 +14916,47 @@ sna_reversed_glyph_blt(DrawablePtr drawable, GCPtr gc, assert(sna->kgem.mode == KGEM_BLT); b = sna->kgem.batch + sna->kgem.nbatch; - b[0] = XY_SETUP_BLT | 1 << 20; - b[1] = bo->pitch; - if (sna->kgem.gen >= 040 && bo->tiling) { - b[0] |= BLT_DST_TILED; - b[1] >>= 2; - } - b[1] |= 1 << 30 | transparent << 29 | blt_depth(drawable->depth) << 24 | rop << 16; - b[2] = extents->y1 << 16 | extents->x1; - b[3] = extents->y2 << 16 | extents->x2; - b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4, bo, - I915_GEM_DOMAIN_RENDER << 16 | - I915_GEM_DOMAIN_RENDER | - KGEM_RELOC_FENCED, - 0); - b[5] = bg; - b[6] = fg; - b[7] = 0; - sna->kgem.nbatch += 8; + if (sna->kgem.gen >= 0100) { + b[0] = XY_SETUP_BLT | 1 << 20 | 8; + b[1] = bo->pitch; + if (sna->kgem.gen >= 040 && bo->tiling) { + b[0] |= BLT_DST_TILED; + b[1] >>= 2; + } + b[1] |= 1 << 30 | transparent << 29 | blt_depth(drawable->depth) << 24 | rop << 16; + b[2] = extents->y1 << 16 | extents->x1; + b[3] = extents->y2 << 16 | extents->x2; + *(uint64_t *)(b+4) = + kgem_add_reloc64(&sna->kgem, sna->kgem.nbatch + 4, bo, + I915_GEM_DOMAIN_RENDER << 16 | + I915_GEM_DOMAIN_RENDER | + KGEM_RELOC_FENCED, + 0); + b[6] = bg; + b[7] = fg; + b[8] = 0; + b[9] = 0; + sna->kgem.nbatch += 10; + } else { + b[0] = XY_SETUP_BLT | 1 << 20 | 6; + b[1] = bo->pitch; + if (sna->kgem.gen >= 040 && bo->tiling) { + b[0] |= BLT_DST_TILED; + b[1] >>= 2; + } + b[1] |= 1 << 30 | transparent << 29 | blt_depth(drawable->depth) << 24 | rop << 16; + b[2] = extents->y1 << 16 | extents->x1; + b[3] = extents->y2 << 16 | extents->x2; + b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4, bo, + I915_GEM_DOMAIN_RENDER << 16 | + I915_GEM_DOMAIN_RENDER | + KGEM_RELOC_FENCED, + 0); + b[5] = bg; + b[6] = fg; + b[7] = 0; + sna->kgem.nbatch += 8; + } do { CharInfoPtr *info = _info; @@ -14476,25 +15024,47 @@ sna_reversed_glyph_blt(DrawablePtr drawable, GCPtr gc, assert(sna->kgem.mode == KGEM_BLT); b = sna->kgem.batch + sna->kgem.nbatch; - b[0] = XY_SETUP_BLT | 1 << 20; - b[1] = bo->pitch; - if (sna->kgem.gen >= 040 && bo->tiling) { - b[0] |= BLT_DST_TILED; - b[1] >>= 2; + if (sna->kgem.gen >= 0100) { + b[0] = XY_SETUP_BLT | 1 << 20 | 8; + b[1] = bo->pitch; + if (bo->tiling) { + b[0] |= BLT_DST_TILED; + b[1] >>= 2; + } + b[1] |= 1 << 30 | transparent << 29 | blt_depth(drawable->depth) << 24 | rop << 16; + b[2] = extents->y1 << 16 | extents->x1; + b[3] = extents->y2 << 16 | extents->x2; + *(uint64_t *)(b+4) = + kgem_add_reloc64(&sna->kgem, sna->kgem.nbatch + 4, bo, + I915_GEM_DOMAIN_RENDER << 16 | + I915_GEM_DOMAIN_RENDER | + KGEM_RELOC_FENCED, + 0); + b[6] = bg; + b[7] = fg; + b[8] = 0; + b[9] = 0; + sna->kgem.nbatch += 10; + } else { + b[0] = XY_SETUP_BLT | 1 << 20 | 6; + b[1] = bo->pitch; + if (sna->kgem.gen >= 040 && bo->tiling) { + b[0] |= BLT_DST_TILED; + b[1] >>= 2; + } + b[1] |= 1 << 30 | transparent << 29 | blt_depth(drawable->depth) << 24 | rop << 16; + b[2] = extents->y1 << 16 | extents->x1; + b[3] = extents->y2 << 16 | extents->x2; + b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4, bo, + I915_GEM_DOMAIN_RENDER << 16 | + I915_GEM_DOMAIN_RENDER | + KGEM_RELOC_FENCED, + 0); + b[5] = bg; + b[6] = fg; + b[7] = 0; + sna->kgem.nbatch += 8; } - b[1] |= 1 << 30 | transparent << 29 | blt_depth(drawable->depth) << 24 | rop << 16; - b[2] = extents->y1 << 16 | extents->x1; - b[3] = extents->y2 << 16 | extents->x2; - b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4, - bo, - I915_GEM_DOMAIN_RENDER << 16 | - I915_GEM_DOMAIN_RENDER | - KGEM_RELOC_FENCED, - 0); - b[5] = bg; - b[6] = fg; - b[7] = 0; - sna->kgem.nbatch += 8; } assert(sna->kgem.mode == KGEM_BLT); @@ -14789,7 +15359,7 @@ sna_push_pixels_solid_blt(GCPtr gc, struct kgem_bo *upload; void *ptr; - if (!kgem_check_batch(&sna->kgem, 8) || + if (!kgem_check_batch(&sna->kgem, 10) || !kgem_check_bo_fenced(&sna->kgem, bo) || !kgem_check_reloc_and_exec(&sna->kgem, 2)) { kgem_submit(&sna->kgem); @@ -14827,34 +15397,63 @@ sna_push_pixels_solid_blt(GCPtr gc, assert(sna->kgem.mode == KGEM_BLT); b = sna->kgem.batch + sna->kgem.nbatch; - b[0] = XY_MONO_SRC_COPY | 3 << 20; - b[0] |= ((box->x1 - region->extents.x1) & 7) << 17; - b[1] = bo->pitch; - if (sna->kgem.gen >= 040 && bo->tiling) { - b[0] |= BLT_DST_TILED; - b[1] >>= 2; - } - b[1] |= 1 << 29; - b[1] |= blt_depth(drawable->depth) << 24; - b[1] |= rop << 16; - b[2] = box->y1 << 16 | box->x1; - b[3] = box->y2 << 16 | box->x2; - b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4, bo, - I915_GEM_DOMAIN_RENDER << 16 | - I915_GEM_DOMAIN_RENDER | - KGEM_RELOC_FENCED, - 0); - b[5] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 5, - upload, - I915_GEM_DOMAIN_RENDER << 16 | - KGEM_RELOC_FENCED, - 0); - b[6] = gc->bgPixel; - b[7] = gc->fgPixel; + if (sna->kgem.gen >= 0100) { + b[0] = XY_MONO_SRC_COPY | 3 << 20 | 8; + b[0] |= ((box->x1 - region->extents.x1) & 7) << 17; + b[1] = bo->pitch; + if (sna->kgem.gen >= 040 && bo->tiling) { + b[0] |= BLT_DST_TILED; + b[1] >>= 2; + } + b[1] |= 1 << 29; + b[1] |= blt_depth(drawable->depth) << 24; + b[1] |= rop << 16; + b[2] = box->y1 << 16 | box->x1; + b[3] = box->y2 << 16 | box->x2; + *(uint64_t *)(b+4) = + kgem_add_reloc64(&sna->kgem, sna->kgem.nbatch + 4, bo, + I915_GEM_DOMAIN_RENDER << 16 | + I915_GEM_DOMAIN_RENDER | + KGEM_RELOC_FENCED, + 0); + *(uint64_t *)(b+6) = + kgem_add_reloc64(&sna->kgem, sna->kgem.nbatch + 6, upload, + I915_GEM_DOMAIN_RENDER << 16 | + KGEM_RELOC_FENCED, + 0); + b[8] = gc->bgPixel; + b[9] = gc->fgPixel; + sna->kgem.nbatch += 10; + } else { + b[0] = XY_MONO_SRC_COPY | 3 << 20 | 6; + b[0] |= ((box->x1 - region->extents.x1) & 7) << 17; + b[1] = bo->pitch; + if (sna->kgem.gen >= 040 && bo->tiling) { + b[0] |= BLT_DST_TILED; + b[1] >>= 2; + } + b[1] |= 1 << 29; + b[1] |= blt_depth(drawable->depth) << 24; + b[1] |= rop << 16; + b[2] = box->y1 << 16 | box->x1; + b[3] = box->y2 << 16 | box->x2; + b[4] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 4, bo, + I915_GEM_DOMAIN_RENDER << 16 | + I915_GEM_DOMAIN_RENDER | + KGEM_RELOC_FENCED, + 0); + b[5] = kgem_add_reloc(&sna->kgem, sna->kgem.nbatch + 5, upload, + I915_GEM_DOMAIN_RENDER << 16 | + KGEM_RELOC_FENCED, + 0); + b[6] = gc->bgPixel; + b[7] = gc->fgPixel; - sna->kgem.nbatch += 8; + sna->kgem.nbatch += 8; + } sigtrap_put(); } + kgem_bo_destroy(&sna->kgem, upload); box++; diff --git a/src/sna/sna_blt.c b/src/sna/sna_blt.c index e63b360c..f5abeffc 100644 --- a/src/sna/sna_blt.c +++ b/src/sna/sna_blt.c @@ -158,7 +158,7 @@ static bool sna_blt_fill_init(struct sna *sna, blt->bpp = bpp; kgem_set_mode(kgem, KGEM_BLT, bo); - if (!kgem_check_batch(kgem, 12) || + if (!kgem_check_batch(kgem, 14) || !kgem_check_bo_fenced(kgem, bo)) { kgem_submit(kgem); if (!kgem_check_bo_fenced(kgem, bo)) @@ -181,22 +181,42 @@ static bool sna_blt_fill_init(struct sna *sna, assert(sna->kgem.mode == KGEM_BLT); b = kgem->batch + kgem->nbatch; - b[0] = XY_SETUP_MONO_PATTERN_SL_BLT; - if (bpp == 32) - b[0] |= BLT_WRITE_ALPHA | BLT_WRITE_RGB; - b[1] = blt->br13; - b[2] = 0; - b[3] = 0; - b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, bo, - I915_GEM_DOMAIN_RENDER << 16 | - I915_GEM_DOMAIN_RENDER | - KGEM_RELOC_FENCED, - 0); - b[5] = pixel; - b[6] = pixel; - b[7] = 0; - b[8] = 0; - kgem->nbatch += 9; + if (sna->kgem.gen >= 0100) { + b[0] = XY_SETUP_MONO_PATTERN_SL_BLT | 8; + if (bpp == 32) + b[0] |= BLT_WRITE_ALPHA | BLT_WRITE_RGB; + b[1] = blt->br13; + b[2] = 0; + b[3] = 0; + *(uint64_t *)(b+4) = + kgem_add_reloc64(kgem, kgem->nbatch + 4, bo, + I915_GEM_DOMAIN_RENDER << 16 | + I915_GEM_DOMAIN_RENDER | + KGEM_RELOC_FENCED, + 0); + b[6] = pixel; + b[7] = pixel; + b[8] = 0; + b[9] = 0; + kgem->nbatch += 10; + } else { + b[0] = XY_SETUP_MONO_PATTERN_SL_BLT | 7; + if (bpp == 32) + b[0] |= BLT_WRITE_ALPHA | BLT_WRITE_RGB; + b[1] = blt->br13; + b[2] = 0; + b[3] = 0; + b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, bo, + I915_GEM_DOMAIN_RENDER << 16 | + I915_GEM_DOMAIN_RENDER | + KGEM_RELOC_FENCED, + 0); + b[5] = pixel; + b[6] = pixel; + b[7] = 0; + b[8] = 0; + kgem->nbatch += 9; + } assert(kgem->nbatch < kgem->surface); sna->blt_state.fill_bo = bo->unique_id; @@ -218,22 +238,42 @@ noinline static void sna_blt_fill_begin(struct sna *sna, assert(kgem->nbatch == 0); b = kgem->batch; - b[0] = XY_SETUP_MONO_PATTERN_SL_BLT; - if (blt->bpp == 32) - b[0] |= BLT_WRITE_ALPHA | BLT_WRITE_RGB; - b[1] = blt->br13; - b[2] = 0; - b[3] = 0; - b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, blt->bo[0], - I915_GEM_DOMAIN_RENDER << 16 | - I915_GEM_DOMAIN_RENDER | - KGEM_RELOC_FENCED, - 0); - b[5] = blt->pixel; - b[6] = blt->pixel; - b[7] = 0; - b[8] = 0; - kgem->nbatch = 9; + if (sna->kgem.gen >= 0100) { + b[0] = XY_SETUP_MONO_PATTERN_SL_BLT | 8; + if (blt->bpp == 32) + b[0] |= BLT_WRITE_ALPHA | BLT_WRITE_RGB; + b[1] = blt->br13; + b[2] = 0; + b[3] = 0; + *(uint32_t *)(b+4) = + kgem_add_reloc64(kgem, kgem->nbatch + 4, blt->bo[0], + I915_GEM_DOMAIN_RENDER << 16 | + I915_GEM_DOMAIN_RENDER | + KGEM_RELOC_FENCED, + 0); + b[6] = blt->pixel; + b[7] = blt->pixel; + b[9] = 0; + b[9] = 0; + kgem->nbatch = 10; + } else { + b[0] = XY_SETUP_MONO_PATTERN_SL_BLT | 7; + if (blt->bpp == 32) + b[0] |= BLT_WRITE_ALPHA | BLT_WRITE_RGB; + b[1] = blt->br13; + b[2] = 0; + b[3] = 0; + b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, blt->bo[0], + I915_GEM_DOMAIN_RENDER << 16 | + I915_GEM_DOMAIN_RENDER | + KGEM_RELOC_FENCED, + 0); + b[5] = blt->pixel; + b[6] = blt->pixel; + b[7] = 0; + b[8] = 0; + kgem->nbatch = 9; + } } inline static void sna_blt_fill_one(struct sna *sna, @@ -279,7 +319,7 @@ static bool sna_blt_copy_init(struct sna *sna, blt->bo[0] = src; blt->bo[1] = dst; - blt->cmd = XY_SRC_COPY_BLT_CMD; + blt->cmd = XY_SRC_COPY_BLT_CMD | (kgem->gen >= 0100 ? 8 : 6); if (bpp == 32) blt->cmd |= BLT_WRITE_ALPHA | BLT_WRITE_RGB; @@ -332,7 +372,7 @@ static bool sna_blt_alpha_fixup_init(struct sna *sna, blt->bo[0] = src; blt->bo[1] = dst; - blt->cmd = XY_FULL_MONO_PATTERN_BLT; + blt->cmd = XY_FULL_MONO_PATTERN_BLT | (kgem->gen >= 0100 ? 12 : 10); blt->pitch[0] = src->pitch; if (kgem->gen >= 040 && src->tiling) { blt->cmd |= BLT_SRC_TILED; @@ -391,7 +431,7 @@ static void sna_blt_alpha_fixup_one(struct sna *sna, assert(width > 0); assert(height > 0); - if (!kgem_check_batch(kgem, 12) || + if (!kgem_check_batch(kgem, 14) || !kgem_check_reloc(kgem, 2)) { _kgem_submit(kgem); _kgem_set_mode(kgem, KGEM_BLT); @@ -403,24 +443,43 @@ static void sna_blt_alpha_fixup_one(struct sna *sna, b[1] = blt->br13; b[2] = (dst_y << 16) | dst_x; b[3] = ((dst_y + height) << 16) | (dst_x + width); - b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, - blt->bo[1], - I915_GEM_DOMAIN_RENDER << 16 | - I915_GEM_DOMAIN_RENDER | - KGEM_RELOC_FENCED, - 0); - b[5] = blt->pitch[0]; - b[6] = (src_y << 16) | src_x; - b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, - blt->bo[0], - I915_GEM_DOMAIN_RENDER << 16 | - KGEM_RELOC_FENCED, - 0); - b[8] = blt->pixel; - b[9] = blt->pixel; - b[10] = 0; - b[11] = 0; - kgem->nbatch += 12; + if (sna->kgem.gen >= 0100) { + *(uint64_t *)(b+4) = + kgem_add_reloc64(kgem, kgem->nbatch + 4, blt->bo[1], + I915_GEM_DOMAIN_RENDER << 16 | + I915_GEM_DOMAIN_RENDER | + KGEM_RELOC_FENCED, + 0); + b[6] = blt->pitch[0]; + b[7] = (src_y << 16) | src_x; + *(uint64_t *)(b+8) = + kgem_add_reloc64(kgem, kgem->nbatch + 8, blt->bo[0], + I915_GEM_DOMAIN_RENDER << 16 | + KGEM_RELOC_FENCED, + 0); + b[10] = blt->pixel; + b[11] = blt->pixel; + b[12] = 0; + b[13] = 0; + kgem->nbatch += 14; + } else { + b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, blt->bo[1], + I915_GEM_DOMAIN_RENDER << 16 | + I915_GEM_DOMAIN_RENDER | + KGEM_RELOC_FENCED, + 0); + b[5] = blt->pitch[0]; + b[6] = (src_y << 16) | src_x; + b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, blt->bo[0], + I915_GEM_DOMAIN_RENDER << 16 | + KGEM_RELOC_FENCED, + 0); + b[8] = blt->pixel; + b[9] = blt->pixel; + b[10] = 0; + b[11] = 0; + kgem->nbatch += 12; + } assert(kgem->nbatch < kgem->surface); } @@ -446,34 +505,61 @@ static void sna_blt_copy_one(struct sna *sna, assert(height > 0); /* Compare against a previous fill */ - if (kgem->nbatch >= 6 && - blt->overwrites && - kgem->batch[kgem->nbatch-6] == (XY_COLOR_BLT | (blt->cmd & (BLT_WRITE_ALPHA | BLT_WRITE_RGB))) && - kgem->batch[kgem->nbatch-4] == ((uint32_t)dst_y << 16 | (uint16_t)dst_x) && - kgem->batch[kgem->nbatch-3] == ((uint32_t)(dst_y+height) << 16 | (uint16_t)(dst_x+width)) && + if (blt->overwrites && kgem->reloc[kgem->nreloc-1].target_handle == blt->bo[1]->target_handle) { - DBG(("%s: replacing last fill\n", __FUNCTION__)); - if (kgem_check_batch(kgem, 8-6)) { - assert(sna->kgem.mode == KGEM_BLT); - b = kgem->batch + kgem->nbatch - 6; - b[0] = blt->cmd; - b[1] = blt->br13; - b[5] = (src_y << 16) | src_x; - b[6] = blt->pitch[0]; - b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7 - 6, - blt->bo[0], - I915_GEM_DOMAIN_RENDER << 16 | - KGEM_RELOC_FENCED, - 0); - kgem->nbatch += 8 - 6; - assert(kgem->nbatch < kgem->surface); - return; + if (sna->kgem.gen >= 0100) { + if (kgem->nbatch >= 7 && + kgem->batch[kgem->nbatch-7] == (XY_COLOR_BLT | (blt->cmd & (BLT_WRITE_ALPHA | BLT_WRITE_RGB)) | 5) && + kgem->batch[kgem->nbatch-5] == ((uint32_t)dst_y << 16 | (uint16_t)dst_x) && + kgem->batch[kgem->nbatch-4] == ((uint32_t)(dst_y+height) << 16 | (uint16_t)(dst_x+width))) { + DBG(("%s: replacing last fill\n", __FUNCTION__)); + if (kgem_check_batch(kgem, 3)) { + assert(kgem->mode == KGEM_BLT); + b = kgem->batch + kgem->nbatch - 7; + b[0] = blt->cmd; + b[1] = blt->br13; + b[6] = (src_y << 16) | src_x; + b[7] = blt->pitch[0]; + *(uint64_t *)(b+8) = + kgem_add_reloc64(kgem, kgem->nbatch + 8 - 7, blt->bo[0], + I915_GEM_DOMAIN_RENDER << 16 | + KGEM_RELOC_FENCED, + 0); + kgem->nbatch += 3; + assert(kgem->nbatch < kgem->surface); + return; + } + kgem->nbatch -= 7; + kgem->nreloc--; + } + } else { + if (kgem->nbatch >= 6 && + kgem->batch[kgem->nbatch-6] == (XY_COLOR_BLT | (blt->cmd & (BLT_WRITE_ALPHA | BLT_WRITE_RGB)) | 4) && + kgem->batch[kgem->nbatch-4] == ((uint32_t)dst_y << 16 | (uint16_t)dst_x) && + kgem->batch[kgem->nbatch-3] == ((uint32_t)(dst_y+height) << 16 | (uint16_t)(dst_x+width))) { + DBG(("%s: replacing last fill\n", __FUNCTION__)); + if (kgem_check_batch(kgem, 8-6)) { + assert(kgem->mode == KGEM_BLT); + b = kgem->batch + kgem->nbatch - 6; + b[0] = blt->cmd; + b[1] = blt->br13; + b[5] = (src_y << 16) | src_x; + b[6] = blt->pitch[0]; + b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7 - 6, blt->bo[0], + I915_GEM_DOMAIN_RENDER << 16 | + KGEM_RELOC_FENCED, + 0); + kgem->nbatch += 8 - 6; + assert(kgem->nbatch < kgem->surface); + return; + } + kgem->nbatch -= 6; + kgem->nreloc--; + } } - kgem->nbatch -= 6; - kgem->nreloc--; } - if (!kgem_check_batch(kgem, 8) || + if (!kgem_check_batch(kgem, 10) || !kgem_check_reloc(kgem, 2)) { _kgem_submit(kgem); _kgem_set_mode(kgem, KGEM_BLT); @@ -485,20 +571,35 @@ static void sna_blt_copy_one(struct sna *sna, b[1] = blt->br13; b[2] = (dst_y << 16) | dst_x; b[3] = ((dst_y + height) << 16) | (dst_x + width); - b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, - blt->bo[1], - I915_GEM_DOMAIN_RENDER << 16 | - I915_GEM_DOMAIN_RENDER | - KGEM_RELOC_FENCED, - 0); - b[5] = (src_y << 16) | src_x; - b[6] = blt->pitch[0]; - b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, - blt->bo[0], - I915_GEM_DOMAIN_RENDER << 16 | - KGEM_RELOC_FENCED, - 0); - kgem->nbatch += 8; + if (kgem->gen >= 0100) { + *(uint64_t *)(b+4) = + kgem_add_reloc64(kgem, kgem->nbatch + 4, blt->bo[1], + I915_GEM_DOMAIN_RENDER << 16 | + I915_GEM_DOMAIN_RENDER | + KGEM_RELOC_FENCED, + 0); + b[6] = (src_y << 16) | src_x; + b[7] = blt->pitch[0]; + *(uint64_t *)(b+8) = + kgem_add_reloc64(kgem, kgem->nbatch + 8, blt->bo[0], + I915_GEM_DOMAIN_RENDER << 16 | + KGEM_RELOC_FENCED, + 0); + kgem->nbatch += 10; + } else { + b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, blt->bo[1], + I915_GEM_DOMAIN_RENDER << 16 | + I915_GEM_DOMAIN_RENDER | + KGEM_RELOC_FENCED, + 0); + b[5] = (src_y << 16) | src_x; + b[6] = blt->pitch[0]; + b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, blt->bo[0], + I915_GEM_DOMAIN_RENDER << 16 | + KGEM_RELOC_FENCED, + 0); + kgem->nbatch += 8; + } assert(kgem->nbatch < kgem->surface); } @@ -1472,6 +1573,140 @@ static void blt_composite_copy_boxes__thread(struct sna *sna, sna_vertex_unlock(&sna->render); } +static void blt_composite_copy_boxes__thread64(struct sna *sna, + const struct sna_composite_op *op, + const BoxRec *box, int nbox) +{ + struct kgem *kgem = &sna->kgem; + int dst_dx = op->dst.x; + int dst_dy = op->dst.y; + int src_dx = op->src.offset[0]; + int src_dy = op->src.offset[1]; + uint32_t cmd = op->u.blt.cmd; + uint32_t br13 = op->u.blt.br13; + struct kgem_bo *src_bo = op->u.blt.bo[0]; + struct kgem_bo *dst_bo = op->u.blt.bo[1]; + int src_pitch = op->u.blt.pitch[0]; + + DBG(("%s: nbox=%d\n", __FUNCTION__, nbox)); + + sna_vertex_lock(&sna->render); + + if ((dst_dx | dst_dy) == 0) { + uint64_t hdr = (uint64_t)br13 << 32 | cmd; + do { + int nbox_this_time; + + nbox_this_time = nbox; + if (10*nbox_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) + nbox_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 10; + if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc) + nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc)/2; + assert(nbox_this_time); + nbox -= nbox_this_time; + + assert(kgem->mode == KGEM_BLT); + do { + uint32_t *b = kgem->batch + kgem->nbatch; + + DBG((" %s: box=(%d, %d)x(%d, %d)\n", + __FUNCTION__, + box->x1, box->y1, + box->x2 - box->x1, box->y2 - box->y1)); + + assert(box->x1 + src_dx >= 0); + assert(box->y1 + src_dy >= 0); + assert(box->x1 + src_dx <= INT16_MAX); + assert(box->y1 + src_dy <= INT16_MAX); + + assert(box->x1 >= 0); + assert(box->y1 >= 0); + + *(uint64_t *)&b[0] = hdr; + *(uint64_t *)&b[2] = *(const uint64_t *)box; + *(uint64_t *)(b+4) = + kgem_add_reloc64(kgem, kgem->nbatch + 4, dst_bo, + I915_GEM_DOMAIN_RENDER << 16 | + I915_GEM_DOMAIN_RENDER | + KGEM_RELOC_FENCED, + 0); + b[6] = add2(b[2], src_dx, src_dy); + b[7] = src_pitch; + *(uint64_t *)(b+8) = + kgem_add_reloc64(kgem, kgem->nbatch + 8, src_bo, + I915_GEM_DOMAIN_RENDER << 16 | + KGEM_RELOC_FENCED, + 0); + kgem->nbatch += 10; + assert(kgem->nbatch < kgem->surface); + box++; + } while (--nbox_this_time); + + if (!nbox) + break; + + _kgem_submit(kgem); + _kgem_set_mode(kgem, KGEM_BLT); + } while (1); + } else { + do { + int nbox_this_time; + + nbox_this_time = nbox; + if (10*nbox_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) + nbox_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 10; + if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc) + nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc)/2; + assert(nbox_this_time); + nbox -= nbox_this_time; + + assert(kgem->mode == KGEM_BLT); + do { + uint32_t *b = kgem->batch + kgem->nbatch; + + DBG((" %s: box=(%d, %d)x(%d, %d)\n", + __FUNCTION__, + box->x1, box->y1, + box->x2 - box->x1, box->y2 - box->y1)); + + assert(box->x1 + src_dx >= 0); + assert(box->y1 + src_dy >= 0); + + assert(box->x1 + dst_dx >= 0); + assert(box->y1 + dst_dy >= 0); + + b[0] = cmd; + b[1] = br13; + b[2] = ((box->y1 + dst_dy) << 16) | (box->x1 + dst_dx); + b[3] = ((box->y2 + dst_dy) << 16) | (box->x2 + dst_dx); + *(uint64_t *)(b+4) = + kgem_add_reloc64(kgem, kgem->nbatch + 4, dst_bo, + I915_GEM_DOMAIN_RENDER << 16 | + I915_GEM_DOMAIN_RENDER | + KGEM_RELOC_FENCED, + 0); + b[6] = ((box->y1 + src_dy) << 16) | (box->x1 + src_dx); + b[7] = src_pitch; + *(uint64_t *)(b+8) = + kgem_add_reloc64(kgem, kgem->nbatch + 8, src_bo, + I915_GEM_DOMAIN_RENDER << 16 | + KGEM_RELOC_FENCED, + 0); + kgem->nbatch += 10; + assert(kgem->nbatch < kgem->surface); + box++; + } while (--nbox_this_time); + + if (!nbox) + break; + + _kgem_submit(kgem); + _kgem_set_mode(kgem, KGEM_BLT); + } while (1); + } + sna_vertex_unlock(&sna->render); +} + fastcall static void blt_composite_copy_with_alpha(struct sna *sna, const struct sna_composite_op *op, @@ -1594,7 +1829,10 @@ prepare_blt_copy(struct sna *sna, op->blt = blt_composite_copy; op->box = blt_composite_copy_box; op->boxes = blt_composite_copy_boxes; - op->thread_boxes = blt_composite_copy_boxes__thread; + if (sna->kgem.gen >= 0100) + op->thread_boxes = blt_composite_copy_boxes__thread64; + else + op->thread_boxes = blt_composite_copy_boxes__thread; if (!sna_blt_copy_init(sna, &op->u.blt, bo, op->dst.bo, src->drawable.bitsPerPixel, @@ -2597,7 +2835,7 @@ static bool sna_blt_fill_box(struct sna *sna, uint8_t alu, assert(box->x1 >= 0); assert(box->y1 >= 0); - cmd = XY_COLOR_BLT; + cmd = XY_COLOR_BLT | (kgem->gen >= 0100 ? 5 : 4); br13 = bo->pitch; if (kgem->gen >= 040 && bo->tiling) { cmd |= BLT_DST_TILED; @@ -2616,29 +2854,58 @@ static bool sna_blt_fill_box(struct sna *sna, uint8_t alu, /* All too frequently one blt completely overwrites the previous */ overwrites = alu == GXcopy || alu == GXclear || alu == GXset; - if (overwrites && kgem->nbatch >= 6 && - kgem->batch[kgem->nbatch-6] == cmd && - *(uint64_t *)&kgem->batch[kgem->nbatch-4] == *(const uint64_t *)box && - kgem->reloc[kgem->nreloc-1].target_handle == bo->target_handle) { - DBG(("%s: replacing last fill\n", __FUNCTION__)); - kgem->batch[kgem->nbatch-5] = br13; - kgem->batch[kgem->nbatch-1] = color; - return true; - } - if (overwrites && kgem->nbatch >= 8 && - (kgem->batch[kgem->nbatch-8] & 0xffc0000f) == XY_SRC_COPY_BLT_CMD && - *(uint64_t *)&kgem->batch[kgem->nbatch-6] == *(const uint64_t *)box && - kgem->reloc[kgem->nreloc-2].target_handle == bo->target_handle) { - DBG(("%s: replacing last copy\n", __FUNCTION__)); - kgem->batch[kgem->nbatch-8] = cmd; - kgem->batch[kgem->nbatch-7] = br13; - kgem->batch[kgem->nbatch-3] = color; - /* Keep the src bo as part of the execlist, just remove - * its relocation entry. - */ - kgem->nreloc--; - kgem->nbatch -= 2; - return true; + if (overwrites) { + if (sna->kgem.gen >= 0100) { + if (kgem->nbatch >= 7 && + kgem->batch[kgem->nbatch-7] == cmd && + *(uint64_t *)&kgem->batch[kgem->nbatch-5] == *(const uint64_t *)box && + kgem->reloc[kgem->nreloc-1].target_handle == bo->target_handle) { + DBG(("%s: replacing last fill\n", __FUNCTION__)); + kgem->batch[kgem->nbatch-6] = br13; + kgem->batch[kgem->nbatch-1] = color; + return true; + } + if (kgem->nbatch >= 10 && + (kgem->batch[kgem->nbatch-10] & 0xffc0000f) == XY_SRC_COPY_BLT_CMD && + *(uint64_t *)&kgem->batch[kgem->nbatch-8] == *(const uint64_t *)box && + kgem->reloc[kgem->nreloc-2].target_handle == bo->target_handle) { + DBG(("%s: replacing last copy\n", __FUNCTION__)); + kgem->batch[kgem->nbatch-10] = cmd; + kgem->batch[kgem->nbatch-8] = br13; + kgem->batch[kgem->nbatch-4] = color; + /* Keep the src bo as part of the execlist, just remove + * its relocation entry. + */ + kgem->nreloc--; + kgem->nbatch -= 3; + return true; + } + } else { + if (kgem->nbatch >= 6 && + kgem->batch[kgem->nbatch-6] == cmd && + *(uint64_t *)&kgem->batch[kgem->nbatch-4] == *(const uint64_t *)box && + kgem->reloc[kgem->nreloc-1].target_handle == bo->target_handle) { + DBG(("%s: replacing last fill\n", __FUNCTION__)); + kgem->batch[kgem->nbatch-5] = br13; + kgem->batch[kgem->nbatch-1] = color; + return true; + } + if (kgem->nbatch >= 8 && + (kgem->batch[kgem->nbatch-8] & 0xffc0000f) == XY_SRC_COPY_BLT_CMD && + *(uint64_t *)&kgem->batch[kgem->nbatch-6] == *(const uint64_t *)box && + kgem->reloc[kgem->nreloc-2].target_handle == bo->target_handle) { + DBG(("%s: replacing last copy\n", __FUNCTION__)); + kgem->batch[kgem->nbatch-8] = cmd; + kgem->batch[kgem->nbatch-7] = br13; + kgem->batch[kgem->nbatch-3] = color; + /* Keep the src bo as part of the execlist, just remove + * its relocation entry. + */ + kgem->nreloc--; + kgem->nbatch -= 2; + return true; + } + } } /* If we are currently emitting SCANLINES, keep doing so */ @@ -2652,7 +2919,7 @@ static bool sna_blt_fill_box(struct sna *sna, uint8_t alu, } kgem_set_mode(kgem, KGEM_BLT, bo); - if (!kgem_check_batch(kgem, 6) || + if (!kgem_check_batch(kgem, 7) || !kgem_check_reloc(kgem, 1) || !kgem_check_bo_fenced(kgem, bo)) { kgem_submit(kgem); @@ -2670,13 +2937,24 @@ static bool sna_blt_fill_box(struct sna *sna, uint8_t alu, b[0] = cmd; b[1] = br13; *(uint64_t *)(b+2) = *(const uint64_t *)box; - b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, bo, - I915_GEM_DOMAIN_RENDER << 16 | - I915_GEM_DOMAIN_RENDER | - KGEM_RELOC_FENCED, - 0); - b[5] = color; - kgem->nbatch += 6; + if (kgem->gen >= 0100) { + *(uint64_t *)(b+4) = + kgem_add_reloc64(kgem, kgem->nbatch + 4, bo, + I915_GEM_DOMAIN_RENDER << 16 | + I915_GEM_DOMAIN_RENDER | + KGEM_RELOC_FENCED, + 0); + b[6] = color; + kgem->nbatch += 7; + } else { + b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, bo, + I915_GEM_DOMAIN_RENDER << 16 | + I915_GEM_DOMAIN_RENDER | + KGEM_RELOC_FENCED, + 0); + b[5] = color; + kgem->nbatch += 6; + } assert(kgem->nbatch < kgem->surface); sna->blt_state.fill_bo = bo->unique_id; @@ -2734,7 +3012,7 @@ bool sna_blt_fill_boxes(struct sna *sna, uint8_t alu, } kgem_set_mode(kgem, KGEM_BLT, bo); - if (!kgem_check_batch(kgem, 12) || + if (!kgem_check_batch(kgem, 14) || !kgem_check_bo_fenced(kgem, bo)) { kgem_submit(kgem); if (!kgem_check_bo_fenced(&sna->kgem, bo)) @@ -2757,22 +3035,42 @@ bool sna_blt_fill_boxes(struct sna *sna, uint8_t alu, assert(sna->kgem.mode == KGEM_BLT); b = kgem->batch + kgem->nbatch; - b[0] = XY_SETUP_MONO_PATTERN_SL_BLT; - if (bpp == 32) - b[0] |= BLT_WRITE_ALPHA | BLT_WRITE_RGB; - b[1] = br13; - b[2] = 0; - b[3] = 0; - b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, bo, - I915_GEM_DOMAIN_RENDER << 16 | - I915_GEM_DOMAIN_RENDER | - KGEM_RELOC_FENCED, - 0); - b[5] = pixel; - b[6] = pixel; - b[7] = 0; - b[8] = 0; - kgem->nbatch += 9; + if (kgem->gen >= 0100) { + b[0] = XY_SETUP_MONO_PATTERN_SL_BLT | 8; + if (bpp == 32) + b[0] |= BLT_WRITE_ALPHA | BLT_WRITE_RGB; + b[1] = br13; + b[2] = 0; + b[3] = 0; + *(uint64_t *)(b+4) = + kgem_add_reloc64(kgem, kgem->nbatch + 4, bo, + I915_GEM_DOMAIN_RENDER << 16 | + I915_GEM_DOMAIN_RENDER | + KGEM_RELOC_FENCED, + 0); + b[6] = pixel; + b[7] = pixel; + b[8] = 0; + b[9] = 0; + kgem->nbatch += 10; + } else { + b[0] = XY_SETUP_MONO_PATTERN_SL_BLT | 7; + if (bpp == 32) + b[0] |= BLT_WRITE_ALPHA | BLT_WRITE_RGB; + b[1] = br13; + b[2] = 0; + b[3] = 0; + b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, bo, + I915_GEM_DOMAIN_RENDER << 16 | + I915_GEM_DOMAIN_RENDER | + KGEM_RELOC_FENCED, + 0); + b[5] = pixel; + b[6] = pixel; + b[7] = 0; + b[8] = 0; + kgem->nbatch += 9; + } assert(kgem->nbatch < kgem->surface); sna->blt_state.fill_bo = bo->unique_id; @@ -2819,22 +3117,42 @@ bool sna_blt_fill_boxes(struct sna *sna, uint8_t alu, assert(sna->kgem.mode == KGEM_BLT); b = kgem->batch + kgem->nbatch; - b[0] = XY_SETUP_MONO_PATTERN_SL_BLT; - if (bpp == 32) - b[0] |= BLT_WRITE_ALPHA | BLT_WRITE_RGB; - b[1] = br13; - b[2] = 0; - b[3] = 0; - b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, bo, - I915_GEM_DOMAIN_RENDER << 16 | - I915_GEM_DOMAIN_RENDER | - KGEM_RELOC_FENCED, - 0); - b[5] = pixel; - b[6] = pixel; - b[7] = 0; - b[8] = 0; - kgem->nbatch += 9; + if (kgem->gen >= 0100) { + b[0] = XY_SETUP_MONO_PATTERN_SL_BLT | 8; + if (bpp == 32) + b[0] |= BLT_WRITE_ALPHA | BLT_WRITE_RGB; + b[1] = br13; + b[2] = 0; + b[3] = 0; + *(uint64_t *)(b+4) = + kgem_add_reloc64(kgem, kgem->nbatch + 4, bo, + I915_GEM_DOMAIN_RENDER << 16 | + I915_GEM_DOMAIN_RENDER | + KGEM_RELOC_FENCED, + 0); + b[6] = pixel; + b[7] = pixel; + b[8] = 0; + b[9] = 0; + kgem->nbatch += 10; + } else { + b[0] = XY_SETUP_MONO_PATTERN_SL_BLT | 7; + if (bpp == 32) + b[0] |= BLT_WRITE_ALPHA | BLT_WRITE_RGB; + b[1] = br13; + b[2] = 0; + b[3] = 0; + b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, bo, + I915_GEM_DOMAIN_RENDER << 16 | + I915_GEM_DOMAIN_RENDER | + KGEM_RELOC_FENCED, + 0); + b[5] = pixel; + b[6] = pixel; + b[7] = 0; + b[8] = 0; + kgem->nbatch += 9; + } assert(kgem->nbatch < kgem->surface); } } while (nbox); @@ -2897,19 +3215,31 @@ bool sna_blt_copy_boxes(struct sna *sna, uint8_t alu, } /* Compare first box against a previous fill */ - if (kgem->nbatch >= 6 && - (alu == GXcopy || alu == GXclear || alu == GXset) && - kgem->reloc[kgem->nreloc-1].target_handle == dst_bo->target_handle && - kgem->batch[kgem->nbatch-6] == ((cmd & ~XY_SRC_COPY_BLT_CMD) | XY_COLOR_BLT) && - kgem->batch[kgem->nbatch-4] == ((uint32_t)(box->y1 + dst_dy) << 16 | (uint16_t)(box->x1 + dst_dx)) && - kgem->batch[kgem->nbatch-3] == ((uint32_t)(box->y2 + dst_dy) << 16 | (uint16_t)(box->x2 + dst_dx))) { - DBG(("%s: deleting last fill\n", __FUNCTION__)); - kgem->nbatch -= 6; - kgem->nreloc--; + if ((alu == GXcopy || alu == GXclear || alu == GXset) && + kgem->reloc[kgem->nreloc-1].target_handle == dst_bo->target_handle) { + if (kgem->gen >= 0100) { + if (kgem->nbatch >= 7 && + kgem->batch[kgem->nbatch-7] == (XY_COLOR_BLT | (cmd & (BLT_WRITE_ALPHA | BLT_WRITE_RGB)) | 5) && + kgem->batch[kgem->nbatch-5] == ((uint32_t)(box->y1 + dst_dy) << 16 | (uint16_t)(box->x1 + dst_dx)) && + kgem->batch[kgem->nbatch-4] == ((uint32_t)(box->y2 + dst_dy) << 16 | (uint16_t)(box->x2 + dst_dx))) { + DBG(("%s: deleting last fill\n", __FUNCTION__)); + kgem->nbatch -= 7; + kgem->nreloc--; + } + } else { + if (kgem->nbatch >= 6 && + kgem->batch[kgem->nbatch-7] == (XY_COLOR_BLT | (cmd & (BLT_WRITE_ALPHA | BLT_WRITE_RGB)) | 4) && + kgem->batch[kgem->nbatch-4] == ((uint32_t)(box->y1 + dst_dy) << 16 | (uint16_t)(box->x1 + dst_dx)) && + kgem->batch[kgem->nbatch-3] == ((uint32_t)(box->y2 + dst_dy) << 16 | (uint16_t)(box->x2 + dst_dx))) { + DBG(("%s: deleting last fill\n", __FUNCTION__)); + kgem->nbatch -= 6; + kgem->nreloc--; + } + } } kgem_set_mode(kgem, KGEM_BLT, dst_bo); - if (!kgem_check_batch(kgem, 8) || + if (!kgem_check_batch(kgem, 10) || !kgem_check_reloc(kgem, 2) || !kgem_check_many_bo_fenced(kgem, dst_bo, src_bo, NULL)) { kgem_submit(kgem); @@ -2922,112 +3252,229 @@ bool sna_blt_copy_boxes(struct sna *sna, uint8_t alu, } if ((dst_dx | dst_dy) == 0) { - uint64_t hdr = (uint64_t)br13 << 32 | cmd; - do { - int nbox_this_time; - - nbox_this_time = nbox; - if (8*nbox_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) - nbox_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 8; - if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc) - nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc)/2; - assert(nbox_this_time); - nbox -= nbox_this_time; - - assert(sna->kgem.mode == KGEM_BLT); + if (kgem->gen >= 0100) { + uint64_t hdr = (uint64_t)br13 << 32 | cmd | 8; do { - uint32_t *b = kgem->batch + kgem->nbatch; - - DBG((" %s: box=(%d, %d)x(%d, %d)\n", - __FUNCTION__, - box->x1, box->y1, - box->x2 - box->x1, box->y2 - box->y1)); - - assert(box->x1 + src_dx >= 0); - assert(box->y1 + src_dy >= 0); - assert(box->x1 + src_dx <= INT16_MAX); - assert(box->y1 + src_dy <= INT16_MAX); - - assert(box->x1 >= 0); - assert(box->y1 >= 0); - - *(uint64_t *)&b[0] = hdr; - *(uint64_t *)&b[2] = *(const uint64_t *)box; - b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst_bo, - I915_GEM_DOMAIN_RENDER << 16 | - I915_GEM_DOMAIN_RENDER | - KGEM_RELOC_FENCED, - 0); - b[5] = add2(b[2], src_dx, src_dy); - b[6] = src_pitch; - b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src_bo, - I915_GEM_DOMAIN_RENDER << 16 | - KGEM_RELOC_FENCED, - 0); - kgem->nbatch += 8; - assert(kgem->nbatch < kgem->surface); - box++; - } while (--nbox_this_time); - - if (!nbox) - break; - - _kgem_submit(kgem); - _kgem_set_mode(kgem, KGEM_BLT); - } while (1); + int nbox_this_time; + + nbox_this_time = nbox; + if (10*nbox_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) + nbox_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 8; + if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc) + nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc)/2; + assert(nbox_this_time); + nbox -= nbox_this_time; + + assert(sna->kgem.mode == KGEM_BLT); + do { + uint32_t *b = kgem->batch + kgem->nbatch; + + DBG((" %s: box=(%d, %d)x(%d, %d)\n", + __FUNCTION__, + box->x1, box->y1, + box->x2 - box->x1, box->y2 - box->y1)); + + assert(box->x1 + src_dx >= 0); + assert(box->y1 + src_dy >= 0); + assert(box->x1 + src_dx <= INT16_MAX); + assert(box->y1 + src_dy <= INT16_MAX); + + assert(box->x1 >= 0); + assert(box->y1 >= 0); + + *(uint64_t *)&b[0] = hdr; + *(uint64_t *)&b[2] = *(const uint64_t *)box; + *(uint64_t *)(b+4) = + kgem_add_reloc64(kgem, kgem->nbatch + 4, dst_bo, + I915_GEM_DOMAIN_RENDER << 16 | + I915_GEM_DOMAIN_RENDER | + KGEM_RELOC_FENCED, + 0); + b[6] = add2(b[2], src_dx, src_dy); + b[7] = src_pitch; + *(uint64_t *)(b+8) = + kgem_add_reloc64(kgem, kgem->nbatch + 8, src_bo, + I915_GEM_DOMAIN_RENDER << 16 | + KGEM_RELOC_FENCED, + 0); + kgem->nbatch += 10; + assert(kgem->nbatch < kgem->surface); + box++; + } while (--nbox_this_time); + + if (!nbox) + break; + + _kgem_submit(kgem); + _kgem_set_mode(kgem, KGEM_BLT); + } while (1); + } else { + uint64_t hdr = (uint64_t)br13 << 32 | cmd | 6; + do { + int nbox_this_time; + + nbox_this_time = nbox; + if (8*nbox_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) + nbox_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 8; + if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc) + nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc)/2; + assert(nbox_this_time); + nbox -= nbox_this_time; + + assert(sna->kgem.mode == KGEM_BLT); + do { + uint32_t *b = kgem->batch + kgem->nbatch; + + DBG((" %s: box=(%d, %d)x(%d, %d)\n", + __FUNCTION__, + box->x1, box->y1, + box->x2 - box->x1, box->y2 - box->y1)); + + assert(box->x1 + src_dx >= 0); + assert(box->y1 + src_dy >= 0); + assert(box->x1 + src_dx <= INT16_MAX); + assert(box->y1 + src_dy <= INT16_MAX); + + assert(box->x1 >= 0); + assert(box->y1 >= 0); + + *(uint64_t *)&b[0] = hdr; + *(uint64_t *)&b[2] = *(const uint64_t *)box; + b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst_bo, + I915_GEM_DOMAIN_RENDER << 16 | + I915_GEM_DOMAIN_RENDER | + KGEM_RELOC_FENCED, + 0); + b[5] = add2(b[2], src_dx, src_dy); + b[6] = src_pitch; + b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src_bo, + I915_GEM_DOMAIN_RENDER << 16 | + KGEM_RELOC_FENCED, + 0); + kgem->nbatch += 8; + assert(kgem->nbatch < kgem->surface); + box++; + } while (--nbox_this_time); + + if (!nbox) + break; + + _kgem_submit(kgem); + _kgem_set_mode(kgem, KGEM_BLT); + } while (1); + } } else { - do { - int nbox_this_time; - - nbox_this_time = nbox; - if (8*nbox_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) - nbox_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 8; - if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc) - nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc)/2; - assert(nbox_this_time); - nbox -= nbox_this_time; - - assert(sna->kgem.mode == KGEM_BLT); + if (kgem->gen >= 0100) { + cmd |= 8; do { - uint32_t *b = kgem->batch + kgem->nbatch; - - DBG((" %s: box=(%d, %d)x(%d, %d)\n", - __FUNCTION__, - box->x1, box->y1, - box->x2 - box->x1, box->y2 - box->y1)); - - assert(box->x1 + src_dx >= 0); - assert(box->y1 + src_dy >= 0); - - assert(box->x1 + dst_dx >= 0); - assert(box->y1 + dst_dy >= 0); - - b[0] = cmd; - b[1] = br13; - b[2] = ((box->y1 + dst_dy) << 16) | (box->x1 + dst_dx); - b[3] = ((box->y2 + dst_dy) << 16) | (box->x2 + dst_dx); - b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst_bo, - I915_GEM_DOMAIN_RENDER << 16 | - I915_GEM_DOMAIN_RENDER | - KGEM_RELOC_FENCED, - 0); - b[5] = ((box->y1 + src_dy) << 16) | (box->x1 + src_dx); - b[6] = src_pitch; - b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src_bo, - I915_GEM_DOMAIN_RENDER << 16 | - KGEM_RELOC_FENCED, - 0); - kgem->nbatch += 8; - assert(kgem->nbatch < kgem->surface); - box++; - } while (--nbox_this_time); - - if (!nbox) - break; - - _kgem_submit(kgem); - _kgem_set_mode(kgem, KGEM_BLT); - } while (1); + int nbox_this_time; + + nbox_this_time = nbox; + if (10*nbox_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) + nbox_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 8; + if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc) + nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc)/2; + assert(nbox_this_time); + nbox -= nbox_this_time; + + assert(sna->kgem.mode == KGEM_BLT); + do { + uint32_t *b = kgem->batch + kgem->nbatch; + + DBG((" %s: box=(%d, %d)x(%d, %d)\n", + __FUNCTION__, + box->x1, box->y1, + box->x2 - box->x1, box->y2 - box->y1)); + + assert(box->x1 + src_dx >= 0); + assert(box->y1 + src_dy >= 0); + + assert(box->x1 + dst_dx >= 0); + assert(box->y1 + dst_dy >= 0); + + b[0] = cmd; + b[1] = br13; + b[2] = ((box->y1 + dst_dy) << 16) | (box->x1 + dst_dx); + b[3] = ((box->y2 + dst_dy) << 16) | (box->x2 + dst_dx); + *(uint64_t *)(b+4) = + kgem_add_reloc64(kgem, kgem->nbatch + 4, dst_bo, + I915_GEM_DOMAIN_RENDER << 16 | + I915_GEM_DOMAIN_RENDER | + KGEM_RELOC_FENCED, + 0); + b[6] = ((box->y1 + src_dy) << 16) | (box->x1 + src_dx); + b[7] = src_pitch; + *(uint64_t *)(b+8) = + kgem_add_reloc64(kgem, kgem->nbatch + 8, src_bo, + I915_GEM_DOMAIN_RENDER << 16 | + KGEM_RELOC_FENCED, + 0); + kgem->nbatch += 10; + assert(kgem->nbatch < kgem->surface); + box++; + } while (--nbox_this_time); + + if (!nbox) + break; + + _kgem_submit(kgem); + _kgem_set_mode(kgem, KGEM_BLT); + } while (1); + } else { + cmd |= 6; + do { + int nbox_this_time; + + nbox_this_time = nbox; + if (8*nbox_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) + nbox_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 8; + if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc) + nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc)/2; + assert(nbox_this_time); + nbox -= nbox_this_time; + + assert(sna->kgem.mode == KGEM_BLT); + do { + uint32_t *b = kgem->batch + kgem->nbatch; + + DBG((" %s: box=(%d, %d)x(%d, %d)\n", + __FUNCTION__, + box->x1, box->y1, + box->x2 - box->x1, box->y2 - box->y1)); + + assert(box->x1 + src_dx >= 0); + assert(box->y1 + src_dy >= 0); + + assert(box->x1 + dst_dx >= 0); + assert(box->y1 + dst_dy >= 0); + + b[0] = cmd; + b[1] = br13; + b[2] = ((box->y1 + dst_dy) << 16) | (box->x1 + dst_dx); + b[3] = ((box->y2 + dst_dy) << 16) | (box->x2 + dst_dx); + b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst_bo, + I915_GEM_DOMAIN_RENDER << 16 | + I915_GEM_DOMAIN_RENDER | + KGEM_RELOC_FENCED, + 0); + b[5] = ((box->y1 + src_dy) << 16) | (box->x1 + src_dx); + b[6] = src_pitch; + b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src_bo, + I915_GEM_DOMAIN_RENDER << 16 | + KGEM_RELOC_FENCED, + 0); + kgem->nbatch += 8; + assert(kgem->nbatch < kgem->surface); + box++; + } while (--nbox_this_time); + + if (!nbox) + break; + + _kgem_submit(kgem); + _kgem_set_mode(kgem, KGEM_BLT); + } while (1); + } } if (kgem->nexec > 1 && __kgem_ring_empty(kgem)) { diff --git a/src/sna/sna_io.c b/src/sna/sna_io.c index 5d238f48..b5e4f376 100644 --- a/src/sna/sna_io.c +++ b/src/sna/sna_io.c @@ -461,7 +461,7 @@ fallback: } kgem_set_mode(kgem, KGEM_BLT, dst_bo); - if (!kgem_check_batch(kgem, 8) || + if (!kgem_check_batch(kgem, 10) || !kgem_check_reloc_and_exec(kgem, 2) || !kgem_check_many_bo_fenced(kgem, dst_bo, src_bo, NULL)) { kgem_submit(kgem); @@ -473,59 +473,123 @@ fallback: tmp_nbox = nbox; tmp_box = box; offset = 0; - do { - int nbox_this_time; - - nbox_this_time = tmp_nbox; - if (8*nbox_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) - nbox_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 8; - if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc) - nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc) / 2; - assert(nbox_this_time); - tmp_nbox -= nbox_this_time; - - for (n = 0; n < nbox_this_time; n++) { - int height = tmp_box[n].y2 - tmp_box[n].y1; - int width = tmp_box[n].x2 - tmp_box[n].x1; - int pitch = PITCH(width, cpp); - uint32_t *b = kgem->batch + kgem->nbatch; - - DBG((" blt offset %x: (%d, %d) x (%d, %d), pitch=%d\n", - offset, tmp_box[n].x1, tmp_box[n].y1, - width, height, pitch)); - - assert(tmp_box[n].x1 >= 0); - assert(tmp_box[n].x2 * dst->drawable.bitsPerPixel/8 <= src_bo->pitch); - assert(tmp_box[n].y1 >= 0); - assert(tmp_box[n].y2 * src_bo->pitch <= kgem_bo_size(src_bo)); - - b[0] = cmd; - b[1] = br13 | pitch; - b[2] = 0; - b[3] = height << 16 | width; - b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst_bo, - I915_GEM_DOMAIN_RENDER << 16 | - I915_GEM_DOMAIN_RENDER | - KGEM_RELOC_FENCED, - offset); - b[5] = tmp_box[n].y1 << 16 | tmp_box[n].x1; - b[6] = src_pitch; - b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src_bo, - I915_GEM_DOMAIN_RENDER << 16 | - KGEM_RELOC_FENCED, - 0); - kgem->nbatch += 8; - - offset += pitch * height; - } + if (sna->kgem.gen >= 0100) { + cmd |= 8; + do { + int nbox_this_time; + + nbox_this_time = tmp_nbox; + if (10*nbox_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) + nbox_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 8; + if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc) + nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc) / 2; + assert(nbox_this_time); + tmp_nbox -= nbox_this_time; + + assert(kgem->mode == KGEM_BLT); + for (n = 0; n < nbox_this_time; n++) { + int height = tmp_box[n].y2 - tmp_box[n].y1; + int width = tmp_box[n].x2 - tmp_box[n].x1; + int pitch = PITCH(width, cpp); + uint32_t *b = kgem->batch + kgem->nbatch; + + DBG((" blt offset %x: (%d, %d) x (%d, %d), pitch=%d\n", + offset, + tmp_box[n].x1, tmp_box[n].y1, + width, height, pitch)); + + assert(tmp_box[n].x1 >= 0); + assert(tmp_box[n].x2 * dst->drawable.bitsPerPixel/8 <= src_bo->pitch); + assert(tmp_box[n].y1 >= 0); + assert(tmp_box[n].y2 * src_bo->pitch <= kgem_bo_size(src_bo)); + + b[0] = cmd; + b[1] = br13 | pitch; + b[2] = 0; + b[3] = height << 16 | width; + *(uint64_t *)(b+4) = + kgem_add_reloc64(kgem, kgem->nbatch + 4, dst_bo, + I915_GEM_DOMAIN_RENDER << 16 | + I915_GEM_DOMAIN_RENDER | + KGEM_RELOC_FENCED, + offset); + b[6] = tmp_box[n].y1 << 16 | tmp_box[n].x1; + b[7] = src_pitch; + *(uint64_t *)(b+8) = + kgem_add_reloc64(kgem, kgem->nbatch + 8, src_bo, + I915_GEM_DOMAIN_RENDER << 16 | + KGEM_RELOC_FENCED, + 0); + kgem->nbatch += 10; + + offset += pitch * height; + } - _kgem_submit(kgem); - if (!tmp_nbox) - break; + _kgem_submit(kgem); + if (!tmp_nbox) + break; - _kgem_set_mode(kgem, KGEM_BLT); - tmp_box += nbox_this_time; - } while (1); + _kgem_set_mode(kgem, KGEM_BLT); + tmp_box += nbox_this_time; + } while (1); + } else { + cmd |= 6; + do { + int nbox_this_time; + + nbox_this_time = tmp_nbox; + if (8*nbox_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) + nbox_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 8; + if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc) + nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc) / 2; + assert(nbox_this_time); + tmp_nbox -= nbox_this_time; + + assert(kgem->mode == KGEM_BLT); + for (n = 0; n < nbox_this_time; n++) { + int height = tmp_box[n].y2 - tmp_box[n].y1; + int width = tmp_box[n].x2 - tmp_box[n].x1; + int pitch = PITCH(width, cpp); + uint32_t *b = kgem->batch + kgem->nbatch; + + DBG((" blt offset %x: (%d, %d) x (%d, %d), pitch=%d\n", + offset, + tmp_box[n].x1, tmp_box[n].y1, + width, height, pitch)); + + assert(tmp_box[n].x1 >= 0); + assert(tmp_box[n].x2 * dst->drawable.bitsPerPixel/8 <= src_bo->pitch); + assert(tmp_box[n].y1 >= 0); + assert(tmp_box[n].y2 * src_bo->pitch <= kgem_bo_size(src_bo)); + + b[0] = cmd; + b[1] = br13 | pitch; + b[2] = 0; + b[3] = height << 16 | width; + b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst_bo, + I915_GEM_DOMAIN_RENDER << 16 | + I915_GEM_DOMAIN_RENDER | + KGEM_RELOC_FENCED, + offset); + b[5] = tmp_box[n].y1 << 16 | tmp_box[n].x1; + b[6] = src_pitch; + b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src_bo, + I915_GEM_DOMAIN_RENDER << 16 | + KGEM_RELOC_FENCED, + 0); + kgem->nbatch += 8; + + offset += pitch * height; + } + + _kgem_submit(kgem); + if (!tmp_nbox) + break; + + _kgem_set_mode(kgem, KGEM_BLT); + tmp_box += nbox_this_time; + } while (1); + } assert(offset == __kgem_buffer_size(dst_bo)); kgem_buffer_read_sync(kgem, dst_bo); @@ -924,7 +988,7 @@ tile: } kgem_set_mode(kgem, KGEM_BLT, dst_bo); - if (!kgem_check_batch(kgem, 8) || + if (!kgem_check_batch(kgem, 10) || !kgem_check_reloc_and_exec(kgem, 2) || !kgem_check_bo_fenced(kgem, dst_bo)) { kgem_submit(kgem); @@ -933,91 +997,185 @@ tile: _kgem_set_mode(kgem, KGEM_BLT); } - do { - int nbox_this_time; - - nbox_this_time = nbox; - if (8*nbox_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) - nbox_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 8; - if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc) - nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc) / 2; - assert(nbox_this_time); - nbox -= nbox_this_time; - - /* Count the total number of bytes to be read and allocate a - * single buffer large enough. Or if it is very small, combine - * with other allocations. */ - offset = 0; - for (n = 0; n < nbox_this_time; n++) { - int height = box[n].y2 - box[n].y1; - int width = box[n].x2 - box[n].x1; - offset += PITCH(width, dst->drawable.bitsPerPixel >> 3) * height; - } + if (kgem->gen >= 0100) { + cmd |= 8; + do { + int nbox_this_time; + + nbox_this_time = nbox; + if (10*nbox_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) + nbox_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 8; + if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc) + nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc) / 2; + assert(nbox_this_time); + nbox -= nbox_this_time; + + /* Count the total number of bytes to be read and allocate a + * single buffer large enough. Or if it is very small, combine + * with other allocations. */ + offset = 0; + for (n = 0; n < nbox_this_time; n++) { + int height = box[n].y2 - box[n].y1; + int width = box[n].x2 - box[n].x1; + offset += PITCH(width, dst->drawable.bitsPerPixel >> 3) * height; + } - src_bo = kgem_create_buffer(kgem, offset, - KGEM_BUFFER_WRITE_INPLACE | (nbox ? KGEM_BUFFER_LAST : 0), - &ptr); - if (!src_bo) - break; + src_bo = kgem_create_buffer(kgem, offset, + KGEM_BUFFER_WRITE_INPLACE | (nbox ? KGEM_BUFFER_LAST : 0), + &ptr); + if (!src_bo) + break; + + offset = 0; + do { + int height = box->y2 - box->y1; + int width = box->x2 - box->x1; + int pitch = PITCH(width, dst->drawable.bitsPerPixel >> 3); + uint32_t *b; - offset = 0; + DBG((" %s: box src=(%d, %d), dst=(%d, %d) size=(%d, %d), dst offset=%d, dst pitch=%d\n", + __FUNCTION__, + box->x1 + src_dx, box->y1 + src_dy, + box->x1 + dst_dx, box->y1 + dst_dy, + width, height, + offset, pitch)); + + assert(box->x1 + src_dx >= 0); + assert((box->x2 + src_dx)*dst->drawable.bitsPerPixel <= 8*stride); + assert(box->y1 + src_dy >= 0); + + assert(box->x1 + dst_dx >= 0); + assert(box->y1 + dst_dy >= 0); + + memcpy_blt(src, (char *)ptr + offset, + dst->drawable.bitsPerPixel, + stride, pitch, + box->x1 + src_dx, box->y1 + src_dy, + 0, 0, + width, height); + + assert(kgem->mode == KGEM_BLT); + b = kgem->batch + kgem->nbatch; + b[0] = cmd; + b[1] = br13; + b[2] = (box->y1 + dst_dy) << 16 | (box->x1 + dst_dx); + b[3] = (box->y2 + dst_dy) << 16 | (box->x2 + dst_dx); + *(uint64_t *)(b+4) = + kgem_add_reloc64(kgem, kgem->nbatch + 4, dst_bo, + I915_GEM_DOMAIN_RENDER << 16 | + I915_GEM_DOMAIN_RENDER | + KGEM_RELOC_FENCED, + 0); + b[6] = 0; + b[7] = pitch; + *(uint64_t *)(b+8) = + kgem_add_reloc64(kgem, kgem->nbatch + 8, src_bo, + I915_GEM_DOMAIN_RENDER << 16 | + KGEM_RELOC_FENCED, + offset); + kgem->nbatch += 10; + + box++; + offset += pitch * height; + } while (--nbox_this_time); + assert(offset == __kgem_buffer_size(src_bo)); + + if (nbox) { + _kgem_submit(kgem); + _kgem_set_mode(kgem, KGEM_BLT); + } + + kgem_bo_destroy(kgem, src_bo); + } while (nbox); + } else { + cmd |= 6; do { - int height = box->y2 - box->y1; - int width = box->x2 - box->x1; - int pitch = PITCH(width, dst->drawable.bitsPerPixel >> 3); - uint32_t *b; - - DBG((" %s: box src=(%d, %d), dst=(%d, %d) size=(%d, %d), dst offset=%d, dst pitch=%d\n", - __FUNCTION__, - box->x1 + src_dx, box->y1 + src_dy, - box->x1 + dst_dx, box->y1 + dst_dy, - width, height, - offset, pitch)); - - assert(box->x1 + src_dx >= 0); - assert((box->x2 + src_dx)*dst->drawable.bitsPerPixel <= 8*stride); - assert(box->y1 + src_dy >= 0); - - assert(box->x1 + dst_dx >= 0); - assert(box->y1 + dst_dy >= 0); - - memcpy_blt(src, (char *)ptr + offset, - dst->drawable.bitsPerPixel, - stride, pitch, - box->x1 + src_dx, box->y1 + src_dy, - 0, 0, - width, height); - - b = kgem->batch + kgem->nbatch; - b[0] = cmd; - b[1] = br13; - b[2] = (box->y1 + dst_dy) << 16 | (box->x1 + dst_dx); - b[3] = (box->y2 + dst_dy) << 16 | (box->x2 + dst_dx); - b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst_bo, - I915_GEM_DOMAIN_RENDER << 16 | - I915_GEM_DOMAIN_RENDER | - KGEM_RELOC_FENCED, - 0); - b[5] = 0; - b[6] = pitch; - b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src_bo, - I915_GEM_DOMAIN_RENDER << 16 | - KGEM_RELOC_FENCED, - offset); - kgem->nbatch += 8; + int nbox_this_time; + + nbox_this_time = nbox; + if (8*nbox_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) + nbox_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 8; + if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc) + nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc) / 2; + assert(nbox_this_time); + nbox -= nbox_this_time; + + /* Count the total number of bytes to be read and allocate a + * single buffer large enough. Or if it is very small, combine + * with other allocations. */ + offset = 0; + for (n = 0; n < nbox_this_time; n++) { + int height = box[n].y2 - box[n].y1; + int width = box[n].x2 - box[n].x1; + offset += PITCH(width, dst->drawable.bitsPerPixel >> 3) * height; + } - box++; - offset += pitch * height; - } while (--nbox_this_time); - assert(offset == __kgem_buffer_size(src_bo)); + src_bo = kgem_create_buffer(kgem, offset, + KGEM_BUFFER_WRITE_INPLACE | (nbox ? KGEM_BUFFER_LAST : 0), + &ptr); + if (!src_bo) + break; - if (nbox) { - _kgem_submit(kgem); - _kgem_set_mode(kgem, KGEM_BLT); - } + offset = 0; + do { + int height = box->y2 - box->y1; + int width = box->x2 - box->x1; + int pitch = PITCH(width, dst->drawable.bitsPerPixel >> 3); + uint32_t *b; + + DBG((" %s: box src=(%d, %d), dst=(%d, %d) size=(%d, %d), dst offset=%d, dst pitch=%d\n", + __FUNCTION__, + box->x1 + src_dx, box->y1 + src_dy, + box->x1 + dst_dx, box->y1 + dst_dy, + width, height, + offset, pitch)); + + assert(box->x1 + src_dx >= 0); + assert((box->x2 + src_dx)*dst->drawable.bitsPerPixel <= 8*stride); + assert(box->y1 + src_dy >= 0); + + assert(box->x1 + dst_dx >= 0); + assert(box->y1 + dst_dy >= 0); + + memcpy_blt(src, (char *)ptr + offset, + dst->drawable.bitsPerPixel, + stride, pitch, + box->x1 + src_dx, box->y1 + src_dy, + 0, 0, + width, height); + + assert(kgem->mode == KGEM_BLT); + b = kgem->batch + kgem->nbatch; + b[0] = cmd; + b[1] = br13; + b[2] = (box->y1 + dst_dy) << 16 | (box->x1 + dst_dx); + b[3] = (box->y2 + dst_dy) << 16 | (box->x2 + dst_dx); + b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst_bo, + I915_GEM_DOMAIN_RENDER << 16 | + I915_GEM_DOMAIN_RENDER | + KGEM_RELOC_FENCED, + 0); + b[5] = 0; + b[6] = pitch; + b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src_bo, + I915_GEM_DOMAIN_RENDER << 16 | + KGEM_RELOC_FENCED, + offset); + kgem->nbatch += 8; + + box++; + offset += pitch * height; + } while (--nbox_this_time); + assert(offset == __kgem_buffer_size(src_bo)); + + if (nbox) { + _kgem_submit(kgem); + _kgem_set_mode(kgem, KGEM_BLT); + } - kgem_bo_destroy(kgem, src_bo); - } while (nbox); + kgem_bo_destroy(kgem, src_bo); + } while (nbox); + } sna->blt_state.fill_bo = 0; return true; @@ -1315,7 +1473,7 @@ tile: } kgem_set_mode(kgem, KGEM_BLT, dst_bo); - if (!kgem_check_batch(kgem, 8) || + if (!kgem_check_batch(kgem, 10) || !kgem_check_reloc_and_exec(kgem, 2) || !kgem_check_bo_fenced(kgem, dst_bo)) { kgem_submit(kgem); @@ -1324,92 +1482,187 @@ tile: _kgem_set_mode(kgem, KGEM_BLT); } - do { - int nbox_this_time; - - nbox_this_time = nbox; - if (8*nbox_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) - nbox_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 8; - if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc) - nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc) / 2; - assert(nbox_this_time); - nbox -= nbox_this_time; - - /* Count the total number of bytes to be read and allocate a - * single buffer large enough. Or if it is very small, combine - * with other allocations. */ - offset = 0; - for (n = 0; n < nbox_this_time; n++) { - int height = box[n].y2 - box[n].y1; - int width = box[n].x2 - box[n].x1; - offset += PITCH(width, dst->drawable.bitsPerPixel >> 3) * height; - } + if (sna->kgem.gen >= 0100) { + cmd |= 8; + do { + int nbox_this_time; + + nbox_this_time = nbox; + if (10*nbox_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) + nbox_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 8; + if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc) + nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc) / 2; + assert(nbox_this_time); + nbox -= nbox_this_time; + + /* Count the total number of bytes to be read and allocate a + * single buffer large enough. Or if it is very small, combine + * with other allocations. */ + offset = 0; + for (n = 0; n < nbox_this_time; n++) { + int height = box[n].y2 - box[n].y1; + int width = box[n].x2 - box[n].x1; + offset += PITCH(width, dst->drawable.bitsPerPixel >> 3) * height; + } + + src_bo = kgem_create_buffer(kgem, offset, + KGEM_BUFFER_WRITE_INPLACE | (nbox ? KGEM_BUFFER_LAST : 0), + &ptr); + if (!src_bo) + break; - src_bo = kgem_create_buffer(kgem, offset, - KGEM_BUFFER_WRITE_INPLACE | (nbox ? KGEM_BUFFER_LAST : 0), - &ptr); - if (!src_bo) - break; + offset = 0; + do { + int height = box->y2 - box->y1; + int width = box->x2 - box->x1; + int pitch = PITCH(width, dst->drawable.bitsPerPixel >> 3); + uint32_t *b; - offset = 0; + DBG((" %s: box src=(%d, %d), dst=(%d, %d) size=(%d, %d), dst offset=%d, dst pitch=%d\n", + __FUNCTION__, + box->x1 + src_dx, box->y1 + src_dy, + box->x1 + dst_dx, box->y1 + dst_dy, + width, height, + offset, pitch)); + + assert(box->x1 + src_dx >= 0); + assert((box->x2 + src_dx)*dst->drawable.bitsPerPixel <= 8*stride); + assert(box->y1 + src_dy >= 0); + + assert(box->x1 + dst_dx >= 0); + assert(box->y1 + dst_dy >= 0); + + memcpy_xor(src, (char *)ptr + offset, + dst->drawable.bitsPerPixel, + stride, pitch, + box->x1 + src_dx, box->y1 + src_dy, + 0, 0, + width, height, + and, or); + + assert(kgem->mode == KGEM_BLT); + b = kgem->batch + kgem->nbatch; + b[0] = cmd; + b[1] = br13; + b[2] = (box->y1 + dst_dy) << 16 | (box->x1 + dst_dx); + b[3] = (box->y2 + dst_dy) << 16 | (box->x2 + dst_dx); + *(uint64_t *)(b+4) = + kgem_add_reloc64(kgem, kgem->nbatch + 4, dst_bo, + I915_GEM_DOMAIN_RENDER << 16 | + I915_GEM_DOMAIN_RENDER | + KGEM_RELOC_FENCED, + 0); + b[6] = 0; + b[7] = pitch; + *(uint64_t *)(b+8) = + kgem_add_reloc64(kgem, kgem->nbatch + 8, src_bo, + I915_GEM_DOMAIN_RENDER << 16 | + KGEM_RELOC_FENCED, + offset); + kgem->nbatch += 10; + + box++; + offset += pitch * height; + } while (--nbox_this_time); + assert(offset == __kgem_buffer_size(src_bo)); + + if (nbox) { + _kgem_submit(kgem); + _kgem_set_mode(kgem, KGEM_BLT); + } + + kgem_bo_destroy(kgem, src_bo); + } while (nbox); + } else { + cmd |= 6; do { - int height = box->y2 - box->y1; - int width = box->x2 - box->x1; - int pitch = PITCH(width, dst->drawable.bitsPerPixel >> 3); - uint32_t *b; - - DBG((" %s: box src=(%d, %d), dst=(%d, %d) size=(%d, %d), dst offset=%d, dst pitch=%d\n", - __FUNCTION__, - box->x1 + src_dx, box->y1 + src_dy, - box->x1 + dst_dx, box->y1 + dst_dy, - width, height, - offset, pitch)); - - assert(box->x1 + src_dx >= 0); - assert((box->x2 + src_dx)*dst->drawable.bitsPerPixel <= 8*stride); - assert(box->y1 + src_dy >= 0); - - assert(box->x1 + dst_dx >= 0); - assert(box->y1 + dst_dy >= 0); - - memcpy_xor(src, (char *)ptr + offset, - dst->drawable.bitsPerPixel, - stride, pitch, - box->x1 + src_dx, box->y1 + src_dy, - 0, 0, - width, height, - and, or); - - b = kgem->batch + kgem->nbatch; - b[0] = cmd; - b[1] = br13; - b[2] = (box->y1 + dst_dy) << 16 | (box->x1 + dst_dx); - b[3] = (box->y2 + dst_dy) << 16 | (box->x2 + dst_dx); - b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst_bo, - I915_GEM_DOMAIN_RENDER << 16 | - I915_GEM_DOMAIN_RENDER | - KGEM_RELOC_FENCED, - 0); - b[5] = 0; - b[6] = pitch; - b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src_bo, - I915_GEM_DOMAIN_RENDER << 16 | - KGEM_RELOC_FENCED, - offset); - kgem->nbatch += 8; + int nbox_this_time; + + nbox_this_time = nbox; + if (8*nbox_this_time > kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) + nbox_this_time = (kgem->surface - kgem->nbatch - KGEM_BATCH_RESERVED) / 8; + if (2*nbox_this_time > KGEM_RELOC_SIZE(kgem) - kgem->nreloc) + nbox_this_time = (KGEM_RELOC_SIZE(kgem) - kgem->nreloc) / 2; + assert(nbox_this_time); + nbox -= nbox_this_time; + + /* Count the total number of bytes to be read and allocate a + * single buffer large enough. Or if it is very small, combine + * with other allocations. */ + offset = 0; + for (n = 0; n < nbox_this_time; n++) { + int height = box[n].y2 - box[n].y1; + int width = box[n].x2 - box[n].x1; + offset += PITCH(width, dst->drawable.bitsPerPixel >> 3) * height; + } - box++; - offset += pitch * height; - } while (--nbox_this_time); - assert(offset == __kgem_buffer_size(src_bo)); + src_bo = kgem_create_buffer(kgem, offset, + KGEM_BUFFER_WRITE_INPLACE | (nbox ? KGEM_BUFFER_LAST : 0), + &ptr); + if (!src_bo) + break; - if (nbox) { - _kgem_submit(kgem); - _kgem_set_mode(kgem, KGEM_BLT); - } + offset = 0; + do { + int height = box->y2 - box->y1; + int width = box->x2 - box->x1; + int pitch = PITCH(width, dst->drawable.bitsPerPixel >> 3); + uint32_t *b; + + DBG((" %s: box src=(%d, %d), dst=(%d, %d) size=(%d, %d), dst offset=%d, dst pitch=%d\n", + __FUNCTION__, + box->x1 + src_dx, box->y1 + src_dy, + box->x1 + dst_dx, box->y1 + dst_dy, + width, height, + offset, pitch)); + + assert(box->x1 + src_dx >= 0); + assert((box->x2 + src_dx)*dst->drawable.bitsPerPixel <= 8*stride); + assert(box->y1 + src_dy >= 0); + + assert(box->x1 + dst_dx >= 0); + assert(box->y1 + dst_dy >= 0); + + memcpy_xor(src, (char *)ptr + offset, + dst->drawable.bitsPerPixel, + stride, pitch, + box->x1 + src_dx, box->y1 + src_dy, + 0, 0, + width, height, + and, or); - kgem_bo_destroy(kgem, src_bo); - } while (nbox); + assert(kgem->mode == KGEM_BLT); + b = kgem->batch + kgem->nbatch; + b[0] = cmd; + b[1] = br13; + b[2] = (box->y1 + dst_dy) << 16 | (box->x1 + dst_dx); + b[3] = (box->y2 + dst_dy) << 16 | (box->x2 + dst_dx); + b[4] = kgem_add_reloc(kgem, kgem->nbatch + 4, dst_bo, + I915_GEM_DOMAIN_RENDER << 16 | + I915_GEM_DOMAIN_RENDER | + KGEM_RELOC_FENCED, + 0); + b[5] = 0; + b[6] = pitch; + b[7] = kgem_add_reloc(kgem, kgem->nbatch + 7, src_bo, + I915_GEM_DOMAIN_RENDER << 16 | + KGEM_RELOC_FENCED, + offset); + kgem->nbatch += 8; + + box++; + offset += pitch * height; + } while (--nbox_this_time); + assert(offset == __kgem_buffer_size(src_bo)); + + if (nbox) { + _kgem_submit(kgem); + _kgem_set_mode(kgem, KGEM_BLT); + } + + kgem_bo_destroy(kgem, src_bo); + } while (nbox); + } sna->blt_state.fill_bo = 0; return true; diff --git a/src/sna/sna_reg.h b/src/sna/sna_reg.h index 26282361..bda6ef67 100644 --- a/src/sna/sna_reg.h +++ b/src/sna/sna_reg.h @@ -42,22 +42,22 @@ #define BLT_SRC_TILED (1<<15) #define BLT_DST_TILED (1<<11) -#define COLOR_BLT_CMD ((2<<29)|(0x40<<22)|(0x3)) -#define XY_COLOR_BLT ((2<<29)|(0x50<<22)|(0x4)) -#define XY_SETUP_BLT ((2<<29)|(1<<22)|6) -#define XY_SETUP_MONO_PATTERN_SL_BLT ((2<<29)|(0x11<<22)|7) -#define XY_SETUP_CLIP ((2<<29)|(3<<22)|1) -#define XY_SCANLINE_BLT ((2<<29)|(0x25<<22)|1) -#define XY_TEXT_IMMEDIATE_BLT ((2<<29)|(0x31<<22)|(1<<16)) -#define XY_SRC_COPY_BLT_CMD ((2<<29)|(0x53<<22)|6) -#define SRC_COPY_BLT_CMD ((2<<29)|(0x43<<22)|0x4) -#define XY_PAT_BLT ((2<<29)|(0x51<<22)|0x4) -#define XY_PAT_BLT_IMMEDIATE ((2<<29)|(0x72<<22)) -#define XY_MONO_PAT ((0x2<<29)|(0x52<<22)|0x7) -#define XY_MONO_SRC_COPY ((0x2<<29)|(0x54<<22)|(0x6)) -#define XY_MONO_SRC_COPY_IMM ((0x2<<29)|(0x71<<22)) -#define XY_FULL_MONO_PATTERN_BLT ((0x2<<29)|(0x57<<22)|0xa) -#define XY_FULL_MONO_PATTERN_MONO_SRC_BLT ((0x2<<29)|(0x58<<22)|0xa) +#define COLOR_BLT_CMD (2<<29|0x40<<22|(0x3)) +#define XY_COLOR_BLT (2<<29|0x50<<22|(0x4)) +#define XY_SETUP_BLT (2<<29|0x01<<22) +#define XY_SETUP_MONO_PATTERN_SL_BLT (2<<29|0x11<<22) +#define XY_SETUP_CLIP (2<<29|0x03<<22|1) +#define XY_SCANLINE_BLT (2<<29|0x25<<22|1) +#define XY_TEXT_IMMEDIATE_BLT (2<<29|0x31<<22|(1<<16)) +#define XY_SRC_COPY_BLT_CMD (2<<29|0x53<<22) +#define SRC_COPY_BLT_CMD (2<<29|0x43<<22|0x4) +#define XY_PAT_BLT (2<<29|0x51<<22) +#define XY_PAT_BLT_IMMEDIATE (2<<29|0x72<<22) +#define XY_MONO_PAT (2<<29|0x52<<22) +#define XY_MONO_SRC_COPY (2<<29|0x54<<22) +#define XY_MONO_SRC_COPY_IMM (2<<29|0x71<<22) +#define XY_FULL_MONO_PATTERN_BLT (2<<29|0x57<<22) +#define XY_FULL_MONO_PATTERN_MONO_SRC_BLT (2<<29|0x58<<22) /* FLUSH commands */ #define BRW_3D(Pipeline,Opcode,Subopcode) \ diff --git a/src/uxa/intel_driver.c b/src/uxa/intel_driver.c index 690ac778..431c34ba 100644 --- a/src/uxa/intel_driver.c +++ b/src/uxa/intel_driver.c @@ -410,6 +410,9 @@ static Bool can_accelerate_blt(struct intel_screen_private *intel) if (INTEL_INFO(intel)->gen == -1) return FALSE; + if (INTEL_INFO(intel)->gen >= 0100) + return FALSE; + if (xf86ReturnOptValBool(intel->Options, OPTION_ACCEL_DISABLE, FALSE) || !intel_option_cast_string_to_bool(intel, OPTION_ACCEL_METHOD, TRUE)) { xf86DrvMsg(intel->scrn->scrnIndex, X_CONFIG, |