From f6b2653c3b66d22efd61382105a852c656485f64 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 26 Mar 2010 09:59:51 +0000 Subject: uxa: solid rects The cost of performing relocations outweigh the advantages of using the blitter for solids with lots of rectangles. References: Bug 22127 - [UXA] 50% performance regression for XRenderFillRectangles https://bugs.freedesktop.org/show_bug.cgi?id=22127 By using the 3D pipeline we improve our performance by around 4x on i945, measured by the jxbench microbenchmark, and a factor of 10x by short-cutting to the 3D pipeline for blended rectangles. Before, on a i945GME: 19982.412060 Ops/s; rects (!); 15x15 9599.131693 Ops/s; rects (!); 75x75 3803.654743 Ops/s; rects (!); 250x250 6836.743772 Ops/s; rects blended; 15x15 1443.750000 Ops/s; rects blended; 75x75 495.335821 Ops/s; rects blended; 250x250 23247.933884 Ops/s; rects composition (!); 15x15 10993.073048 Ops/s; rects composition (!); 75x75 3595.905172 Ops/s; rects composition (!); 250x250 After: 87271.145975 Ops/s; rects (!); 15x15 32347.744361 Ops/s; rects (!); 75x75 5884.177215 Ops/s; rects (!); 250x250 73500.000000 Ops/s; rects blended; 15x15 33580.882353 Ops/s; rects blended; 75x75 5858.811749 Ops/s; rects blended; 250x250 25582.317073 Ops/s; rects composition (!); 15x15 6664.728682 Ops/s; rects composition (!); 75x75 14965.909091 Ops/s; rects composition (!); 250x250 [suspicious] This has no impact on Cairo, but I have a suspicion from watching xtrace that Qt likes to blit thousands of 1x1 rectangles with the same colour. However, we are still around 2-3x slower than the reported figures for EXA! Signed-off-by: Chris Wilson (cherry picked from commit cb887cfc670bf63993bd313ff33927afb8198eae) Signed-off-by: Owain G. Ainsworth --- uxa/uxa.c | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'uxa/uxa.c') diff --git a/uxa/uxa.c b/uxa/uxa.c index d6ad5a63..9ea14662 100644 --- a/uxa/uxa.c +++ b/uxa/uxa.c @@ -388,6 +388,7 @@ static Bool uxa_close_screen(int i, ScreenPtr pScreen) #ifdef RENDER if (ps) { ps->Composite = uxa_screen->SavedComposite; + ps->CompositeRects = uxa_screen->SavedCompositeRects; ps->Glyphs = uxa_screen->SavedGlyphs; ps->Trapezoids = uxa_screen->SavedTrapezoids; ps->AddTraps = uxa_screen->SavedAddTraps; @@ -517,6 +518,9 @@ Bool uxa_driver_init(ScreenPtr screen, uxa_driver_t * uxa_driver) uxa_screen->SavedComposite = ps->Composite; ps->Composite = uxa_composite; + uxa_screen->SavedCompositeRects = ps->CompositeRects; + ps->CompositeRects = uxa_solid_rects; + uxa_screen->SavedGlyphs = ps->Glyphs; ps->Glyphs = uxa_glyphs; -- cgit v1.2.3 From 4e3c06f691a1073d811fd01cb965b3f2c63ca996 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 28 May 2010 11:38:05 +0100 Subject: uxa: Use a glyph private rather than a hash table. Store the cache position directly on the glyph using a devPrivate rather than an through auxiliary hash table. x11perf on PineView: 650/638 kglyphs/s -> 701/686 kglyphs/s [aa/rgb] Signed-off-by: Chris Wilson (cherry picked from commit 11581dda99cb2e4ae78fc73be4b02185b3be58ed) Signed-off-by: Owain G. Ainsworth --- uxa/uxa.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'uxa/uxa.c') diff --git a/uxa/uxa.c b/uxa/uxa.c index 9ea14662..ce03fa07 100644 --- a/uxa/uxa.c +++ b/uxa/uxa.c @@ -393,6 +393,8 @@ static Bool uxa_close_screen(int i, ScreenPtr pScreen) ps->Trapezoids = uxa_screen->SavedTrapezoids; ps->AddTraps = uxa_screen->SavedAddTraps; ps->Triangles = uxa_screen->SavedTriangles; + + ps->UnrealizeGlyph = uxa_screen->SavedUnrealizeGlyph; } #endif @@ -524,6 +526,9 @@ Bool uxa_driver_init(ScreenPtr screen, uxa_driver_t * uxa_driver) uxa_screen->SavedGlyphs = ps->Glyphs; ps->Glyphs = uxa_glyphs; + uxa_screen->SavedUnrealizeGlyph = ps->UnrealizeGlyph; + ps->UnrealizeGlyph = uxa_glyph_unrealize; + uxa_screen->SavedTriangles = ps->Triangles; ps->Triangles = uxa_triangles; -- cgit v1.2.3 From f6cd08d57843d5e604d9cc3f687b01a9e0b2d1cf Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 29 May 2010 17:35:00 +0100 Subject: uxa: Mega-Glyphs! Rewrite glyph rendering to avoid the intermediate buffer, accumulating the glyph rectangles directly in the backend composite routines. And modify the glyph cache routines to fully utilise the allocated size of the tiled buffer on older hardware. To do this we alias all glyph sizes into the same texture using a technique suggested by Keith Packard. PineView: 885/856-> 1150/1110 kglyph/s (aa/rgb) Signed-off-by: Chris Wilson (cherry picked from commit 5fff430046db2030f89e49beb66c6476dc3fcd4c) Signed-off-by: Owain G. Ainsworth --- uxa/uxa.c | 43 ++++++++++++++++++++----------------------- 1 file changed, 20 insertions(+), 23 deletions(-) (limited to 'uxa/uxa.c') diff --git a/uxa/uxa.c b/uxa/uxa.c index ce03fa07..330b3893 100644 --- a/uxa/uxa.c +++ b/uxa/uxa.c @@ -432,9 +432,6 @@ Bool uxa_driver_init(ScreenPtr screen, uxa_driver_t * uxa_driver) { uxa_screen_t *uxa_screen; ScrnInfoPtr scrn = xf86Screens[screen->myNum]; -#ifdef RENDER - PictureScreenPtr ps; -#endif if (!uxa_driver) return FALSE; @@ -463,10 +460,6 @@ Bool uxa_driver_init(ScreenPtr screen, uxa_driver_t * uxa_driver) "non-NULL\n", screen->myNum); return FALSE; } -#ifdef RENDER - ps = GetPictureScreenIfSet(screen); -#endif - uxa_screen = xcalloc(sizeof(uxa_screen_t), 1); if (!uxa_screen) { @@ -516,27 +509,30 @@ Bool uxa_driver_init(ScreenPtr screen, uxa_driver_t * uxa_driver) scrn->EnableDisableFBAccess = uxa_xorg_enable_disable_fb_access; #ifdef RENDER - if (ps) { - uxa_screen->SavedComposite = ps->Composite; - ps->Composite = uxa_composite; + { + PictureScreenPtr ps = GetPictureScreenIfSet(screen); + if (ps) { + uxa_screen->SavedComposite = ps->Composite; + ps->Composite = uxa_composite; - uxa_screen->SavedCompositeRects = ps->CompositeRects; - ps->CompositeRects = uxa_solid_rects; + uxa_screen->SavedCompositeRects = ps->CompositeRects; + ps->CompositeRects = uxa_solid_rects; - uxa_screen->SavedGlyphs = ps->Glyphs; - ps->Glyphs = uxa_glyphs; + uxa_screen->SavedGlyphs = ps->Glyphs; + ps->Glyphs = uxa_glyphs; - uxa_screen->SavedUnrealizeGlyph = ps->UnrealizeGlyph; - ps->UnrealizeGlyph = uxa_glyph_unrealize; + uxa_screen->SavedUnrealizeGlyph = ps->UnrealizeGlyph; + ps->UnrealizeGlyph = uxa_glyph_unrealize; - uxa_screen->SavedTriangles = ps->Triangles; - ps->Triangles = uxa_triangles; + uxa_screen->SavedTriangles = ps->Triangles; + ps->Triangles = uxa_triangles; - uxa_screen->SavedTrapezoids = ps->Trapezoids; - ps->Trapezoids = uxa_trapezoids; + uxa_screen->SavedTrapezoids = ps->Trapezoids; + ps->Trapezoids = uxa_trapezoids; - uxa_screen->SavedAddTraps = ps->AddTraps; - ps->AddTraps = uxa_check_add_traps; + uxa_screen->SavedAddTraps = ps->AddTraps; + ps->AddTraps = uxa_check_add_traps; + } } #endif @@ -548,7 +544,8 @@ Bool uxa_driver_init(ScreenPtr screen, uxa_driver_t * uxa_driver) ShmRegisterFuncs(screen, &uxa_shm_funcs); #endif - uxa_glyphs_init(screen); + if (!uxa_glyphs_init(screen)) + return FALSE; LogMessage(X_INFO, "UXA(%d): Driver registered support for the following" -- cgit v1.2.3 From ae9f6675da8663d3b2706c7975c6160fea23f676 Mon Sep 17 00:00:00 2001 From: Eric Anholt Date: Fri, 4 Jun 2010 16:04:37 -0700 Subject: Use libc instead of deprecated libc wrappers for malloc/calloc/free. Signed-off-by: Owain G. Ainsworth (cherrypick of part of this commit came from 2c1fda08e889cad07acb452230da06f9c383d21c by eric anholt) --- uxa/uxa.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'uxa/uxa.c') diff --git a/uxa/uxa.c b/uxa/uxa.c index 330b3893..e6f96705 100644 --- a/uxa/uxa.c +++ b/uxa/uxa.c @@ -398,7 +398,7 @@ static Bool uxa_close_screen(int i, ScreenPtr pScreen) } #endif - xfree(uxa_screen); + free(uxa_screen); return (*pScreen->CloseScreen) (i, pScreen); } @@ -409,13 +409,13 @@ static Bool uxa_close_screen(int i, ScreenPtr pScreen) * without breaking ABI between UXA and the drivers. The driver's * responsibility is to check beforehand that the UXA module has a matching * major number and sufficient minor. Drivers are responsible for freeing the - * driver structure using xfree(). + * driver structure using free(). * * @return a newly allocated, zero-filled driver structure */ uxa_driver_t *uxa_driver_alloc(void) { - return xcalloc(1, sizeof(uxa_driver_t)); + return calloc(1, sizeof(uxa_driver_t)); } /** @@ -460,7 +460,7 @@ Bool uxa_driver_init(ScreenPtr screen, uxa_driver_t * uxa_driver) "non-NULL\n", screen->myNum); return FALSE; } - uxa_screen = xcalloc(sizeof(uxa_screen_t), 1); + uxa_screen = calloc(sizeof(uxa_screen_t), 1); if (!uxa_screen) { LogMessage(X_WARNING, -- cgit v1.2.3 From 34ce21b8537a1873b70437b273f3d6b46ed5b443 Mon Sep 17 00:00:00 2001 From: Keith Packard Date: Fri, 30 Apr 2010 13:33:54 -0700 Subject: Adapt to DevPrivate API changes This allows the driver to be built against either the old or new DevPrivate API. Signed-off-by: Keith Packard (cherry picked from commit 42ddc39430a10513c49a415ddf1a0dc5fa52089e) Signed-off-by: Owain G. Ainsworth --- uxa/uxa.c | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'uxa/uxa.c') diff --git a/uxa/uxa.c b/uxa/uxa.c index e6f96705..19c6a2d1 100644 --- a/uxa/uxa.c +++ b/uxa/uxa.c @@ -39,7 +39,11 @@ #include "dixfontstr.h" #include "uxa.h" +#if HAS_DEVPRIVATEKEYREC +DevPrivateKeyRec uxa_screen_index; +#else int uxa_screen_index; +#endif /** * uxa_get_drawable_pixmap() returns a backing pixmap for a given drawable. @@ -460,6 +464,10 @@ Bool uxa_driver_init(ScreenPtr screen, uxa_driver_t * uxa_driver) "non-NULL\n", screen->myNum); return FALSE; } +#if HAS_DIXREGISTERPRIVATEKEY + if (!dixRegisterPrivateKey(&uxa_screen_index, PRIVATE_SCREEN, 0)) + return FALSE; +#endif uxa_screen = calloc(sizeof(uxa_screen_t), 1); if (!uxa_screen) { -- cgit v1.2.3 From fcdd53c6c130bf29cfd92309672c1fed3ec048f6 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Mon, 7 Jun 2010 18:04:51 +0100 Subject: uxa: Setup acceleration functions prior to the damage layer MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We need to install the acceleration functions so that they are wrapped by the Damage layer. This fixes the corruption under a compositing WM introduced in commit 8700673157fdd3a87ad5150f2f30823261fec519. Signed-off-by: Chris Wilson Reported-and-tested-by: Arkadiusz Miƛkiewicz (cherry picked from commit e6acbc763229fd5b5b2cc1d65136404d02ac4655) Signed-off-by: Owain G. Ainsworth --- uxa/uxa.c | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'uxa/uxa.c') diff --git a/uxa/uxa.c b/uxa/uxa.c index 19c6a2d1..a48c3e13 100644 --- a/uxa/uxa.c +++ b/uxa/uxa.c @@ -552,9 +552,6 @@ Bool uxa_driver_init(ScreenPtr screen, uxa_driver_t * uxa_driver) ShmRegisterFuncs(screen, &uxa_shm_funcs); #endif - if (!uxa_glyphs_init(screen)) - return FALSE; - LogMessage(X_INFO, "UXA(%d): Driver registered support for the following" " operations:\n", screen->myNum); @@ -575,6 +572,14 @@ Bool uxa_driver_init(ScreenPtr screen, uxa_driver_t * uxa_driver) return TRUE; } +Bool uxa_resources_init(ScreenPtr screen) +{ + if (!uxa_glyphs_init(screen)) + return FALSE; + + return TRUE; +} + /** * uxa_driver_fini tears down UXA on a given screen. * -- cgit v1.2.3 From d00c22a0b42b9acbbd43ea2fa37b97c1ff5034e2 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Sat, 19 Jun 2010 12:30:23 +0100 Subject: uxa: Enable SHM pixmaps Now with streaming uploads and downloads for composite operations in place, shared memory pixmaps are no longer that dire performance wise. With careful use these can in fact be the most efficient means of transfer between a wholly software renderer in the client and a backing store. For instance, Chromium renders internally to an ARGB32 image buffer and uses a shared pixmap to composite dirty regions into the backing store. Thereby using the GPU to either perform the blit or the format conversion. Enabling shared pixmaps, reduces our CPU overhead whilst scrolling by a factor of 5 or so. And this is achieved simply by deleting obsolete code! Signed-off-by: Chris Wilson (cherry picked from commit 4b7142baa0b3bf6f38843d06aadc579d8624cefc) Signed-off-by: Owain G. Ainsworth --- uxa/uxa.c | 8 -------- 1 file changed, 8 deletions(-) (limited to 'uxa/uxa.c') diff --git a/uxa/uxa.c b/uxa/uxa.c index a48c3e13..37546dc3 100644 --- a/uxa/uxa.c +++ b/uxa/uxa.c @@ -544,14 +544,6 @@ Bool uxa_driver_init(ScreenPtr screen, uxa_driver_t * uxa_driver) } #endif -#ifdef MITSHM - /* Re-register with the MI funcs, which don't allow shared pixmaps. - * Shared pixmaps are almost always a performance loss for us, but this - * still allows for SHM PutImage. - */ - ShmRegisterFuncs(screen, &uxa_shm_funcs); -#endif - LogMessage(X_INFO, "UXA(%d): Driver registered support for the following" " operations:\n", screen->myNum); -- cgit v1.2.3