diff options
author | Chris Wilson <chris@chris-wilson.co.uk> | 2011-07-01 14:55:44 +0100 |
---|---|---|
committer | Chris Wilson <chris@chris-wilson.co.uk> | 2011-07-01 21:41:23 +0100 |
commit | 120c98ac10435c8e848a8337c1f544f81a05cd3a (patch) | |
tree | 2c69846fad530f2d45d29ebfcae2783955b3872a /src | |
parent | f6c8c3bb6fd75bca6c7704b7d5869a5d44ce3832 (diff) |
sna: Downsample sources 2x too large to fit in the 3D pipeline
This is quite trivial to hit given the 2k limits on gen2/gen3. We
compromise on image quality by pre-downscaling the source by a fixed
factor to make it fit into the pipeline in preference to performing the
entire operation on the CPU.
Signed-off-by: Chris Wilson <chris@chris-wilson.co.uk>
Diffstat (limited to 'src')
-rw-r--r-- | src/sna/Makefile.am | 1 | ||||
-rw-r--r-- | src/sna/kgem.c | 58 | ||||
-rw-r--r-- | src/sna/kgem.h | 6 | ||||
-rw-r--r-- | src/sna/kgem_debug.c | 4 | ||||
-rw-r--r-- | src/sna/kgem_debug.h | 3 | ||||
-rw-r--r-- | src/sna/kgem_debug_gen3.c | 5 | ||||
-rw-r--r-- | src/sna/sna_render.c | 211 |
7 files changed, 283 insertions, 5 deletions
diff --git a/src/sna/Makefile.am b/src/sna/Makefile.am index dfd8a57f..d76480da 100644 --- a/src/sna/Makefile.am +++ b/src/sna/Makefile.am @@ -90,6 +90,7 @@ if DEBUG libsna_la_SOURCES += \ kgem_debug.c \ kgem_debug.h \ + kgem_debug_gen2.c \ kgem_debug_gen3.c \ kgem_debug_gen4.c \ kgem_debug_gen5.c \ diff --git a/src/sna/kgem.c b/src/sna/kgem.c index 88b1d837..6fe6e936 100644 --- a/src/sna/kgem.c +++ b/src/sna/kgem.c @@ -1819,6 +1819,64 @@ struct kgem_bo *kgem_upload_source_image(struct kgem *kgem, return bo; } +struct kgem_bo *kgem_upload_source_image_halved(struct kgem *kgem, + pixman_format_code_t format, + const void *data, + int x, int y, + int width, int height, + int stride, int bpp) +{ + int dst_stride = ALIGN(width * bpp / 2, 32) >> 3; + int size = dst_stride * height / 2; + struct kgem_bo *bo; + pixman_image_t *src_image, *dst_image; + pixman_transform_t t; + void *dst; + + DBG(("%s : (%d, %d), (%d, %d), stride=%d, bpp=%d\n", + __FUNCTION__, x, y, width, height, stride, bpp)); + + bo = kgem_create_buffer(kgem, size, KGEM_BUFFER_WRITE, &dst); + if (bo == NULL) + return NULL; + + dst_image = pixman_image_create_bits(format, width/2, height/2, + dst, dst_stride); + if (dst_image == NULL) + goto cleanup_bo; + + src_image = pixman_image_create_bits(format, width, height, + (uint32_t*)data, stride); + if (src_image == NULL) + goto cleanup_dst; + + memset(&t, 0, sizeof(t)); + t.matrix[0][0] = 2 << 16; + t.matrix[1][1] = 2 << 16; + t.matrix[2][2] = 1 << 16; + pixman_image_set_transform(src_image, &t); + pixman_image_set_filter(src_image, PIXMAN_FILTER_BILINEAR, NULL, 0); + + pixman_image_composite(PIXMAN_OP_SRC, + src_image, NULL, dst_image, + x, y, + 0, 0, + 0, 0, + width/2, height/2); + + pixman_image_unref(src_image); + pixman_image_unref(dst_image); + + bo->pitch = dst_stride; + return bo; + +cleanup_dst: + pixman_image_unref(dst_image); +cleanup_bo: + kgem_bo_destroy(kgem, bo); + return NULL; +} + void kgem_buffer_sync(struct kgem *kgem, struct kgem_bo *_bo) { struct kgem_partial_bo *bo; diff --git a/src/sna/kgem.h b/src/sna/kgem.h index fac30afa..013809c8 100644 --- a/src/sna/kgem.h +++ b/src/sna/kgem.h @@ -141,6 +141,12 @@ struct kgem_bo *kgem_upload_source_image(struct kgem *kgem, int x, int y, int width, int height, int stride, int bpp); +struct kgem_bo *kgem_upload_source_image_halved(struct kgem *kgem, + pixman_format_code_t format, + const void *data, + int x, int y, + int width, int height, + int stride, int bpp); int kgem_choose_tiling(struct kgem *kgem, int tiling, int width, int height, int bpp); diff --git a/src/sna/kgem_debug.c b/src/sna/kgem_debug.c index 0dcd7065..20fe8a2e 100644 --- a/src/sna/kgem_debug.c +++ b/src/sna/kgem_debug.c @@ -352,6 +352,8 @@ static int (*decode_3d(int gen))(struct kgem*, uint32_t) return kgem_gen4_decode_3d; } else if (gen >= 30) { return kgem_gen3_decode_3d; + } else if (gen >= 20) { + return kgem_gen2_decode_3d; } assert(0); } @@ -366,6 +368,8 @@ static void (*finish_state(int gen))(struct kgem*) return kgem_gen4_finish_state; } else if (gen >= 30) { return kgem_gen3_finish_state; + } else if (gen >= 20) { + return kgem_gen2_finish_state; } assert(0); } diff --git a/src/sna/kgem_debug.h b/src/sna/kgem_debug.h index f9a931df..9211dcb3 100644 --- a/src/sna/kgem_debug.h +++ b/src/sna/kgem_debug.h @@ -25,4 +25,7 @@ void kgem_gen4_finish_state(struct kgem *kgem); int kgem_gen3_decode_3d(struct kgem *kgem, uint32_t offset); void kgem_gen3_finish_state(struct kgem *kgem); +int kgem_gen2_decode_3d(struct kgem *kgem, uint32_t offset); +void kgem_gen2_finish_state(struct kgem *kgem); + #endif diff --git a/src/sna/kgem_debug_gen3.c b/src/sna/kgem_debug_gen3.c index da1d9fc9..6709a8ec 100644 --- a/src/sna/kgem_debug_gen3.c +++ b/src/sna/kgem_debug_gen3.c @@ -1552,9 +1552,6 @@ out: int kgem_gen3_decode_3d(struct kgem *kgem, uint32_t offset) { - uint32_t opcode; - unsigned int idx; - struct { uint32_t opcode; int min_len; @@ -1572,6 +1569,8 @@ int kgem_gen3_decode_3d(struct kgem *kgem, uint32_t offset) { 0x07, 1, 1, "3DSTATE_RASTERIZATION_RULES" }, }; uint32_t *data = kgem->batch + offset; + uint32_t opcode; + unsigned int idx; opcode = (data[0] & 0x1f000000) >> 24; diff --git a/src/sna/sna_render.c b/src/sna/sna_render.c index 72a3c1e2..baf51c32 100644 --- a/src/sna/sna_render.c +++ b/src/sna/sna_render.c @@ -427,6 +427,212 @@ sna_render_pixmap_bo(struct sna *sna, return bo != NULL; } +static int sna_render_picture_downsample(struct sna *sna, + PicturePtr picture, + struct sna_composite_channel *channel, + int16_t x, int16_t y, + int16_t w, int16_t h, + int16_t dst_x, int16_t dst_y) +{ + struct kgem_bo *bo = NULL; + PixmapPtr pixmap = get_drawable_pixmap(picture->pDrawable); + int16_t ox, oy, ow, oh; + BoxRec box; + + assert(w && h); + + DBG(("%s (%d, %d)x(%d, %d) [dst=(%d, %d)]\n", + __FUNCTION__, x, y, w, h, dst_x, dst_y)); + + ow = w; + oh = h; + + ox = box.x1 = x; + oy = box.y1 = y; + box.x2 = x + w; + box.y2 = y + h; + if (channel->transform) { + pixman_vector_t v; + + pixman_transform_bounds(channel->transform, &box); + + v.vector[0] = ox << 16; + v.vector[1] = oy << 16; + v.vector[2] = 1 << 16; + pixman_transform_point(channel->transform, &v); + ox = v.vector[0] / v.vector[2]; + oy = v.vector[1] / v.vector[2]; + } + + if (channel->repeat != RepeatNone) { + if (box.x1 < 0 || + box.y1 < 0 || + box.x2 > pixmap->drawable.width || + box.y2 > pixmap->drawable.height) { + /* XXX tiled repeats? */ + box.x1 = box.y1 = 0; + box.x2 = pixmap->drawable.width; + box.y2 = pixmap->drawable.height; + + if (!channel->is_affine) { + DBG(("%s: fallback -- repeating project transform too large for texture\n", + __FUNCTION__)); + return sna_render_picture_fixup(sna, + picture, + channel, + x, y, ow, oh, + dst_x, dst_y); + } + } + } else { + if (box.x1 < 0) + box.x1 = 0; + if (box.y1 < 0) + box.y1 = 0; + if (box.x2 > pixmap->drawable.width) + box.x2 = pixmap->drawable.width; + if (box.y2 > pixmap->drawable.height) + box.y2 = pixmap->drawable.height; + } + + w = box.x2 - box.x1; + h = box.y2 - box.y1; + assert(w && h); + if (w > 2*sna->render.max_3d_size || h > 2*sna->render.max_3d_size) + goto fixup; + + if (texture_is_cpu(pixmap, &box) && !move_to_gpu(pixmap, &box)) { + bo = kgem_upload_source_image_halved(&sna->kgem, + picture->format, + pixmap->devPrivate.ptr, + box.x1, box.y1, w, h, + pixmap->devKind, + pixmap->drawable.bitsPerPixel); + if (!bo) { + DBG(("%s: failed to upload source image, using clear\n", + __FUNCTION__)); + return 0; + } + } else { + ScreenPtr screen = pixmap->drawable.pScreen; + PicturePtr tmp_src, tmp_dst; + PictFormatPtr format; + struct sna_pixmap *priv; + pixman_transform_t t; + PixmapPtr tmp; + int error, i, j, ww, hh; + + if (!sna_pixmap_force_to_gpu(pixmap)) + goto fixup; + + tmp = screen->CreatePixmap(screen, + w/2, h/2, pixmap->drawable.depth, + CREATE_PIXMAP_USAGE_SCRATCH); + if (!tmp) + goto fixup; + + priv = sna_pixmap(tmp); + if (!priv) { + screen->DestroyPixmap(tmp); + goto fixup; + } + + format = PictureMatchFormat(screen, + pixmap->drawable.depth, + picture->format); + + tmp_dst = CreatePicture(0, &tmp->drawable, format, 0, NULL, + serverClient, &error); + + tmp_src = CreatePicture(0, &pixmap->drawable, format, 0, NULL, + serverClient, &error); + tmp_src->filter = PictFilterBilinear; + memset(&t, 0, sizeof(t)); + t.matrix[0][0] = 2 << 16; + t.matrix[1][1] = 2 << 16; + t.matrix[2][2] = 1 << 16; + tmp_src->transform = &t; + + ValidatePicture(tmp_dst); + ValidatePicture(tmp_src); + + ww = w/4; hh = h/4; + + DBG(("%s downsampling using %dx%d GPU tiles\n", + __FUNCTION__, ww, hh)); + + for (i = 0; i < 2; i++) { + for (j = 0; j < 2; j++) { + struct sna_composite_op op; + BoxRec b; + + memset(&op, 0, sizeof(op)); + if (!sna->render.composite(sna, + PictOpSrc, + tmp_src, NULL, tmp_dst, + box.x1 + ww*j, box.y1 + hh*i, + 0, 0, + ww*j, hh*i, + ww, hh, + &op)) { + tmp_src->transform = NULL; + FreePicture(tmp_src, 0); + FreePicture(tmp_dst, 0); + screen->DestroyPixmap(tmp); + goto fixup; + } + + b.x1 = ww*j; + b.y1 = hh*i; + b.x2 = b.x1 + ww; + b.y2 = b.y1 + hh; + + op.boxes(sna, &op, &b, 1); + op.done(sna, &op); + } + } + + bo = kgem_bo_reference(priv->gpu_bo); + + tmp_src->transform = NULL; + FreePicture(tmp_src, 0); + FreePicture(tmp_dst, 0); + screen->DestroyPixmap(tmp); + } + + if (ox == x && oy == y) { + x = y = 0; + } else if (channel->transform) { + pixman_vector_t v; + pixman_transform_t m; + + v.vector[0] = (ox - box.x1) << 16; + v.vector[1] = (oy - box.y1) << 16; + v.vector[2] = 1 << 16; + pixman_transform_invert(&m, channel->transform); + pixman_transform_point(&m, &v); + x = v.vector[0] / v.vector[2]; + y = v.vector[1] / v.vector[2]; + } else { + x = ox - box.x1; + y = oy - box.y1; + } + + channel->offset[0] = x - dst_x; + channel->offset[1] = y - dst_y; + channel->scale[0] = 1./w; + channel->scale[1] = 1./h; + channel->width = w / 2; + channel->height = h / 2; + channel->bo = bo; + return 1; + +fixup: + return sna_render_picture_fixup(sna, picture, channel, + x, y, w, h, + dst_x, dst_y); +} + int sna_render_picture_extract(struct sna *sna, PicturePtr picture, @@ -517,8 +723,9 @@ sna_render_picture_extract(struct sna *sna, if (w > sna->render.max_3d_size || h > sna->render.max_3d_size) { DBG(("%s: fallback -- sample too large for texture (%d, %d)x(%d, %d)\n", __FUNCTION__, box.x1, box.y1, w, h)); - return sna_render_picture_fixup(sna, picture, channel, - x, y, ow, oh, dst_x, dst_y); + return sna_render_picture_downsample(sna, picture, channel, + x, y, ow, oh, + dst_x, dst_y); } if (texture_is_cpu(pixmap, &box) && !move_to_gpu(pixmap, &box)) { |