summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorMichel Dänzer <daenzer@vmware.com>2009-10-03 16:33:32 +0200
committerMichel Dänzer <daenzer@vmware.com>2009-10-03 16:33:32 +0200
commit4b4ce36081ca151c24e028c54b59986f41731a73 (patch)
tree06b15fc1c2acdaaf48a93254ba51b08a8d1ca63d /src
parenteade1e5be159c9f2965d611925596d33cab11d6d (diff)
R3/5xx EXA: Minimise number of draw primitives used for Composite operations.
This should reduce the kernel CS checker overhead, if nothing else. I'll leave porting this to other chipset families to others who can test it.
Diffstat (limited to 'src')
-rw-r--r--src/radeon.h3
-rw-r--r--src/radeon_exa_render.c46
2 files changed, 37 insertions, 12 deletions
diff --git a/src/radeon.h b/src/radeon.h
index db7b26b8..0322bf00 100644
--- a/src/radeon.h
+++ b/src/radeon.h
@@ -687,6 +687,9 @@ struct radeon_accel_state {
/* Size of tiles ... set to 65536x65536 if not tiling in that direction */
Bool src_tile_width;
Bool src_tile_height;
+ uint32_t *draw_header;
+ unsigned vtx_count;
+ unsigned num_vtx;
Bool vsync;
diff --git a/src/radeon_exa_render.c b/src/radeon_exa_render.c
index 6053eef2..c266fb71 100644
--- a/src/radeon_exa_render.c
+++ b/src/radeon_exa_render.c
@@ -2041,6 +2041,16 @@ static void FUNC_NAME(RadeonDoneComposite)(PixmapPtr pDst)
ENTER_DRAW(0);
if (IS_R300_3D || IS_R500_3D) {
+ if (info->accel_state->draw_header) {
+ info->accel_state->draw_header[0] = CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2,
+ info->accel_state->num_vtx *
+ info->accel_state->vtx_count);
+ info->accel_state->draw_header[1] = RADEON_CP_VC_CNTL_PRIM_TYPE_QUAD_LIST |
+ RADEON_CP_VC_CNTL_PRIM_WALK_RING |
+ (info->accel_state->num_vtx << RADEON_CP_VC_CNTL_NUM_SHIFT);
+ info->accel_state->draw_header = NULL;
+ }
+
BEGIN_ACCEL(3);
OUT_ACCEL_REG(R300_SC_CLIP_RULE, 0xAAAA);
OUT_ACCEL_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_RB3D_DC_FLUSH_ALL);
@@ -2126,8 +2136,10 @@ static void FUNC_NAME(RadeonCompositeTile)(ScrnInfoPtr pScrn,
/* ErrorF("RadeonComposite (%d,%d) (%d,%d) (%d,%d) (%d,%d)\n",
srcX, srcY, maskX, maskY,dstX, dstY, w, h); */
-#if defined(ACCEL_CP) && defined(XF86DRM_MODE)
- if (info->cs && CS_FULL(info->cs)) {
+#if defined(ACCEL_CP)
+ if ((info->cs && CS_FULL(info->cs)) ||
+ (!info->cs && (info->cp->indirectBuffer->used + 4 * 32) >
+ info->cp->indirectBuffer->total)) {
FUNC_NAME(RadeonDoneComposite)(info->accel_state->dst_pix);
radeon_cs_flush_indirect(pScrn);
info->accel_state->exa->PrepareComposite(info->accel_state->composite_op,
@@ -2202,12 +2214,26 @@ static void FUNC_NAME(RadeonCompositeTile)(ScrnInfoPtr pScrn,
RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE |
(3 << RADEON_CP_VC_CNTL_NUM_SHIFT));
} else if (IS_R300_3D || IS_R500_3D) {
- BEGIN_RING(4 * vtx_count + 4);
- OUT_RING(CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2,
- 4 * vtx_count));
- OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_QUAD_LIST |
- RADEON_CP_VC_CNTL_PRIM_WALK_RING |
- (4 << RADEON_CP_VC_CNTL_NUM_SHIFT));
+ if (!info->accel_state->draw_header) {
+ BEGIN_RING(2);
+
+ if (info->cs)
+ info->accel_state->draw_header = info->cs->packets + info->cs->cdw;
+ else
+ info->accel_state->draw_header = __head;
+ info->accel_state->num_vtx = 0;
+ info->accel_state->vtx_count = vtx_count;
+
+ OUT_RING(CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2,
+ 4 * vtx_count));
+ OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_QUAD_LIST |
+ RADEON_CP_VC_CNTL_PRIM_WALK_RING |
+ (4 << RADEON_CP_VC_CNTL_NUM_SHIFT));
+ ADVANCE_RING();
+ }
+
+ info->accel_state->num_vtx += 4;
+ BEGIN_RING(4 * vtx_count);
} else {
BEGIN_RING(3 * vtx_count + 2);
OUT_RING(CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2,
@@ -2267,10 +2293,6 @@ static void FUNC_NAME(RadeonCompositeTile)(ScrnInfoPtr pScrn,
xFixedToFloat(srcTopRight.x) / info->accel_state->texW[0], xFixedToFloat(srcTopRight.y) / info->accel_state->texH[0]);
}
- if (IS_R300_3D || IS_R500_3D)
- /* flushing is pipelined, free/finish is not */
- OUT_ACCEL_REG(R300_RB3D_DSTCACHE_CTLSTAT, R300_DC_FLUSH_3D);
-
#ifdef ACCEL_CP
ADVANCE_RING();
#else