diff options
author | Dave Airlie <airlied@redhat.com> | 2009-11-25 10:33:17 +1000 |
---|---|---|
committer | Dave Airlie <airlied@redhat.com> | 2009-11-25 10:35:37 +1000 |
commit | 3a460a14b9603159f10d89da27b559c36a184e27 (patch) | |
tree | cc53eaab10c9bf5f532bd77e9ac5650bb3f0422c /src | |
parent | 797a3f0c71c94477eec565ea2c95553c6f66d9fd (diff) |
r600: refactor code to help future acceleration speedups.
This changes the vertex buffer index to be an offset, and
records the start of the vb for each operation and uses
that to set the operations up.
This still flushes after each operation to make sure we have
no regressions in non-kms/kms cases.
Signed-off-by: Dave Airlie <airlied@redhat.com>
Diffstat (limited to 'src')
-rw-r--r-- | src/r600_exa.c | 99 | ||||
-rw-r--r-- | src/r600_textured_videofuncs.c | 18 | ||||
-rw-r--r-- | src/r6xx_accel.c | 4 | ||||
-rw-r--r-- | src/radeon.h | 3 | ||||
-rw-r--r-- | src/radeon_kms.c | 6 |
5 files changed, 83 insertions, 47 deletions
diff --git a/src/r600_exa.c b/src/r600_exa.c index 4c63378a..331711c8 100644 --- a/src/r600_exa.c +++ b/src/r600_exa.c @@ -359,6 +359,7 @@ R600PrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg) ErrorF("PM: 0x%08x\n", pm); #endif + accel_state->vb_start_op = accel_state->vb_offset; return TRUE; } @@ -371,12 +372,14 @@ R600Solid(PixmapPtr pPix, int x1, int y1, int x2, int y2) struct radeon_accel_state *accel_state = info->accel_state; float *vb; - if (((accel_state->vb_index + 3) * 8) > accel_state->vb_total) { + if ((accel_state->vb_offset + (3 * 8)) > accel_state->vb_total) { R600DoneSolid(pPix); + if (info->cs) + radeon_cs_flush_indirect(pScrn); r600_cp_start(pScrn); } - vb = (pointer)((char*)accel_state->vb_ptr+accel_state->vb_index*8); + vb = (pointer)((char*)accel_state->vb_ptr+accel_state->vb_offset); vb[0] = (float)x1; vb[1] = (float)y1; @@ -387,7 +390,7 @@ R600Solid(PixmapPtr pPix, int x1, int y1, int x2, int y2) vb[4] = (float)x2; vb[5] = (float)y2; - accel_state->vb_index += 3; + accel_state->vb_offset += (3*8); } @@ -403,13 +406,13 @@ R600DoneSolid(PixmapPtr pPix) CLEAR (draw_conf); CLEAR (vtx_res); - if (accel_state->vb_index == 0) { + if (accel_state->vb_offset == 0) { R600IBDiscard(pScrn, accel_state->ib); r600_vb_discard(pScrn); return; } - accel_state->vb_size = accel_state->vb_index * 8; + accel_state->vb_size = accel_state->vb_offset; /* flush vertex cache */ if ((info->ChipFamily == CHIP_FAMILY_RV610) || @@ -426,11 +429,12 @@ R600DoneSolid(PixmapPtr pPix) accel_state->vb_bo, RADEON_GEM_DOMAIN_GTT, 0); /* Vertex buffer setup */ + accel_state->vb_size -= accel_state->vb_start_op; vtx_res.id = SQ_VTX_RESOURCE_vs; vtx_res.vtx_size_dw = 8 / 4; vtx_res.vtx_num_entries = accel_state->vb_size / 4; vtx_res.mem_req_size = 1; - vtx_res.vb_addr = accel_state->vb_mc_addr; + vtx_res.vb_addr = accel_state->vb_mc_addr + accel_state->vb_start_op; vtx_res.bo = accel_state->vb_bo; set_vtx_resource (pScrn, accel_state->ib, &vtx_res); @@ -450,6 +454,8 @@ R600DoneSolid(PixmapPtr pPix) accel_state->dst_size, accel_state->dst_mc_addr, accel_state->dst_bo, RADEON_GEM_DOMAIN_VRAM, 0); + accel_state->vb_start_op = 0; + R600CPFlushIndirect(pScrn, accel_state->ib); } @@ -651,6 +657,8 @@ R600DoPrepareCopy(ScrnInfoPtr pScrn, SEL_CENTROID_bit)); EREG(accel_state->ib, SPI_INTERP_CONTROL_0, 0); END_BATCH(); + + accel_state->vb_start_op = accel_state->vb_offset; } static void @@ -664,13 +672,13 @@ R600DoCopy(ScrnInfoPtr pScrn) CLEAR (draw_conf); CLEAR (vtx_res); - if (accel_state->vb_index == 0) { + if (accel_state->vb_offset == 0) { R600IBDiscard(pScrn, accel_state->ib); r600_vb_discard(pScrn); return; } - accel_state->vb_size = accel_state->vb_index * 16; + accel_state->vb_size = accel_state->vb_offset; /* flush vertex cache */ if ((info->ChipFamily == CHIP_FAMILY_RV610) || @@ -687,11 +695,13 @@ R600DoCopy(ScrnInfoPtr pScrn) accel_state->vb_bo, RADEON_GEM_DOMAIN_GTT, 0); /* Vertex buffer setup */ + accel_state->vb_size -= accel_state->vb_start_op; + vtx_res.id = SQ_VTX_RESOURCE_vs; vtx_res.vtx_size_dw = 16 / 4; vtx_res.vtx_num_entries = accel_state->vb_size / 4; vtx_res.mem_req_size = 1; - vtx_res.vb_addr = accel_state->vb_mc_addr; + vtx_res.vb_addr = accel_state->vb_mc_addr + accel_state->vb_start_op; vtx_res.bo = accel_state->vb_bo; set_vtx_resource (pScrn, accel_state->ib, &vtx_res); @@ -710,6 +720,7 @@ R600DoCopy(ScrnInfoPtr pScrn) accel_state->dst_size, accel_state->dst_mc_addr, accel_state->dst_bo, RADEON_GEM_DOMAIN_VRAM, 0); + accel_state->vb_start_op = 0; R600CPFlushIndirect(pScrn, accel_state->ib); } @@ -723,12 +734,14 @@ R600AppendCopyVertex(ScrnInfoPtr pScrn, struct radeon_accel_state *accel_state = info->accel_state; float *vb; - if (((accel_state->vb_index + 3) * 16) > accel_state->vb_total) { + if ((accel_state->vb_offset + (3 * 16)) > accel_state->vb_total) { R600DoCopy(pScrn); + if (info->cs) + radeon_cs_flush_indirect(pScrn); r600_cp_start(pScrn); } - vb = (pointer)((char*)accel_state->vb_ptr+accel_state->vb_index*16); + vb = (pointer)((char*)accel_state->vb_ptr+accel_state->vb_offset); vb[0] = (float)dstX; vb[1] = (float)dstY; @@ -745,7 +758,7 @@ R600AppendCopyVertex(ScrnInfoPtr pScrn, vb[10] = (float)(srcX + w); vb[11] = (float)(srcY + h); - accel_state->vb_index += 3; + accel_state->vb_offset += (3 * 16); } static Bool @@ -1888,6 +1901,8 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture, EREG(accel_state->ib, SPI_INTERP_CONTROL_0, 0); END_BATCH(); + accel_state->vb_start_op = accel_state->vb_offset; + return TRUE; } @@ -1906,12 +1921,14 @@ static void R600Composite(PixmapPtr pDst, srcX, srcY, maskX, maskY,dstX, dstY, w, h); */ if (accel_state->msk_pic) { - if (((accel_state->vb_index + 3) * 24) > accel_state->vb_total) { + if ((accel_state->vb_offset + (3 * 24)) > accel_state->vb_total) { R600DoneComposite(pDst); + if (info->cs) + radeon_cs_flush_indirect(pScrn); r600_cp_start(pScrn); } - vb = (pointer)((char*)accel_state->vb_ptr+accel_state->vb_index*24); + vb = (pointer)((char*)accel_state->vb_ptr+accel_state->vb_offset); vb[0] = (float)dstX; vb[1] = (float)dstY; @@ -1934,13 +1951,16 @@ static void R600Composite(PixmapPtr pDst, vb[16] = (float)(maskX + w); vb[17] = (float)(maskY + h); + accel_state->vb_offset += 3 * 24; } else { - if (((accel_state->vb_index + 3) * 16) > accel_state->vb_total) { + if ((accel_state->vb_offset + (3 * 16)) > accel_state->vb_total) { R600DoneComposite(pDst); + if (info->cs) + radeon_cs_flush_indirect(pScrn); r600_cp_start(pScrn); } - vb = (pointer)((char*)accel_state->vb_ptr+accel_state->vb_index*16); + vb = (pointer)((char*)accel_state->vb_ptr+accel_state->vb_offset); vb[0] = (float)dstX; vb[1] = (float)dstY; @@ -1956,9 +1976,10 @@ static void R600Composite(PixmapPtr pDst, vb[9] = (float)(dstY + h); vb[10] = (float)(srcX + w); vb[11] = (float)(srcY + h); + accel_state->vb_offset += 3 * 16; + } - accel_state->vb_index += 3; } @@ -1973,30 +1994,13 @@ static void R600DoneComposite(PixmapPtr pDst) CLEAR (draw_conf); CLEAR (vtx_res); - if (accel_state->vb_index == 0) { + if (accel_state->vb_offset == 0) { R600IBDiscard(pScrn, accel_state->ib); r600_vb_discard(pScrn); return; } - /* Vertex buffer setup */ - if (accel_state->msk_pic) { - accel_state->vb_size = accel_state->vb_index * 24; - vtx_res.id = SQ_VTX_RESOURCE_vs; - vtx_res.vtx_size_dw = 24 / 4; - vtx_res.vtx_num_entries = accel_state->vb_size / 4; - vtx_res.mem_req_size = 1; - vtx_res.vb_addr = accel_state->vb_mc_addr; - vtx_res.bo = accel_state->vb_bo; - } else { - accel_state->vb_size = accel_state->vb_index * 16; - vtx_res.id = SQ_VTX_RESOURCE_vs; - vtx_res.vtx_size_dw = 16 / 4; - vtx_res.vtx_num_entries = accel_state->vb_size / 4; - vtx_res.mem_req_size = 1; - vtx_res.vb_addr = accel_state->vb_mc_addr; - vtx_res.bo = accel_state->vb_bo; - } + accel_state->vb_size = accel_state->vb_offset; /* flush vertex cache */ if ((info->ChipFamily == CHIP_FAMILY_RV610) || (info->ChipFamily == CHIP_FAMILY_RV620) || @@ -2011,6 +2015,24 @@ static void R600DoneComposite(PixmapPtr pDst) accel_state->vb_size, accel_state->vb_mc_addr, accel_state->vb_bo, RADEON_GEM_DOMAIN_GTT, 0); + accel_state->vb_size -= accel_state->vb_start_op; + + /* Vertex buffer setup */ + if (accel_state->msk_pic) { + vtx_res.id = SQ_VTX_RESOURCE_vs; + vtx_res.vtx_size_dw = 24 / 4; + vtx_res.vtx_num_entries = accel_state->vb_size / 4; + vtx_res.mem_req_size = 1; + vtx_res.vb_addr = accel_state->vb_mc_addr + accel_state->vb_start_op; + vtx_res.bo = accel_state->vb_bo; + } else { + vtx_res.id = SQ_VTX_RESOURCE_vs; + vtx_res.vtx_size_dw = 16 / 4; + vtx_res.vtx_num_entries = accel_state->vb_size / 4; + vtx_res.mem_req_size = 1; + vtx_res.vb_addr = accel_state->vb_mc_addr + accel_state->vb_start_op; + vtx_res.bo = accel_state->vb_bo; + } set_vtx_resource(pScrn, accel_state->ib, &vtx_res); draw_conf.prim_type = DI_PT_RECTLIST; @@ -2027,8 +2049,8 @@ static void R600DoneComposite(PixmapPtr pDst) accel_state->dst_size, accel_state->dst_mc_addr, accel_state->dst_bo, RADEON_GEM_DOMAIN_VRAM, 0); + accel_state->vb_start_op = 0; R600CPFlushIndirect(pScrn, accel_state->ib); - } Bool @@ -2336,6 +2358,9 @@ R600DownloadFromScreenCS(PixmapPtr pSrc, int x, int y, int w, 3, 0xffffffff); R600AppendCopyVertex(pScrn, x, y, 0, 0, w, h); R600DoCopy(pScrn); + + if (info->cs) + radeon_cs_flush_indirect(pScrn); r = radeon_bo_map(scratch, 0); if (r) { diff --git a/src/r600_textured_videofuncs.c b/src/r600_textured_videofuncs.c index 7598429f..7d0cfa78 100644 --- a/src/r600_textured_videofuncs.c +++ b/src/r600_textured_videofuncs.c @@ -65,13 +65,13 @@ R600DoneTexturedVideo(ScrnInfoPtr pScrn) CLEAR (draw_conf); CLEAR (vtx_res); - if (accel_state->vb_index == 0) { + if (accel_state->vb_offset == 0) { R600IBDiscard(pScrn, accel_state->ib); r600_vb_discard(pScrn); return; } - accel_state->vb_size = accel_state->vb_index * 16; + accel_state->vb_size = accel_state->vb_offset; /* flush vertex cache */ if ((info->ChipFamily == CHIP_FAMILY_RV610) || @@ -88,11 +88,12 @@ R600DoneTexturedVideo(ScrnInfoPtr pScrn) accel_state->vb_bo, RADEON_GEM_DOMAIN_GTT, 0); /* Vertex buffer setup */ + accel_state->vb_size -= accel_state->vb_start_op; vtx_res.id = SQ_VTX_RESOURCE_vs; vtx_res.vtx_size_dw = 16 / 4; vtx_res.vtx_num_entries = accel_state->vb_size / 4; vtx_res.mem_req_size = 1; - vtx_res.vb_addr = accel_state->vb_mc_addr; + vtx_res.vb_addr = accel_state->vb_mc_addr + accel_state->vb_start_op; vtx_res.bo = accel_state->vb_bo; set_vtx_resource (pScrn, accel_state->ib, &vtx_res); @@ -111,6 +112,7 @@ R600DoneTexturedVideo(ScrnInfoPtr pScrn) accel_state->dst_size, accel_state->dst_mc_addr, accel_state->dst_bo, 0, RADEON_GEM_DOMAIN_VRAM); + accel_state->vb_start_op = 0; R600CPFlushIndirect(pScrn, accel_state->ib); } @@ -564,6 +566,8 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) EREG(accel_state->ib, SPI_INTERP_CONTROL_0, 0); END_BATCH(); + accel_state->vb_start_op = accel_state->vb_offset; + vs_alu_consts[0] = 1.0 / pPriv->w; vs_alu_consts[1] = 1.0 / pPriv->h; vs_alu_consts[2] = 0.0; @@ -595,12 +599,14 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) int dstX, dstY, dstw, dsth; float *vb; - if (((accel_state->vb_index + 3) * 16) > accel_state->vb_total) { + if ((accel_state->vb_offset + (3 * 16)) > accel_state->vb_total) { R600DoneTexturedVideo(pScrn); + if (info->cs) + radeon_cs_flush_indirect(pScrn); r600_cp_start(pScrn); } - vb = (pointer)((char*)accel_state->vb_ptr+accel_state->vb_index*16); + vb = (pointer)((char*)accel_state->vb_ptr+accel_state->vb_offset); dstX = pBox->x1 + dstxoff; dstY = pBox->y1 + dstyoff; @@ -632,7 +638,7 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) vb[10] = (float)(srcX + srcw); vb[11] = (float)(srcY + srch); - accel_state->vb_index += 3; + accel_state->vb_offset += 3 * 16; pBox++; } diff --git a/src/r6xx_accel.c b/src/r6xx_accel.c index a89bfb3f..50afaed6 100644 --- a/src/r6xx_accel.c +++ b/src/r6xx_accel.c @@ -86,6 +86,8 @@ void R600IBDiscard(ScrnInfoPtr pScrn, drmBufPtr ib) if (info->accel_state->vb_ptr) { radeon_bo_unmap(info->accel_state->vb_bo); info->accel_state->vb_ptr = NULL; + info->accel_state->vb_offset = 0; + info->accel_state->vb_start_op = 0; } if (CS_FULL(info->cs)) { radeon_cs_flush_indirect(pScrn); @@ -1183,8 +1185,8 @@ r600_vb_get(ScrnInfoPtr pScrn) accel_state->vb_total = (accel_state->ib->total / 2); accel_state->vb_ptr = (pointer)((char*)accel_state->ib->address + (accel_state->ib->total / 2)); + accel_state->vb_offset = 0; } - accel_state->vb_index = 0; return TRUE; } diff --git a/src/radeon.h b/src/radeon.h index 0dbaa52a..745ee8e5 100644 --- a/src/radeon.h +++ b/src/radeon.h @@ -703,12 +703,13 @@ struct radeon_accel_state { Bool vsync; drmBufPtr ib; - int vb_index; + int vb_offset; uint64_t vb_mc_addr; int vb_total; void *vb_ptr; uint32_t vb_size; struct radeon_bo *vb_bo; + uint32_t vb_start_op; // shader storage ExaOffscreenArea *shaders; diff --git a/src/radeon_kms.c b/src/radeon_kms.c index cafc3298..b9228c11 100644 --- a/src/radeon_kms.c +++ b/src/radeon_kms.c @@ -80,8 +80,10 @@ void radeon_cs_flush_indirect(ScrnInfoPtr pScrn) return; if (info->accel_state->vb_ptr) { - radeon_bo_unmap(info->accel_state->vb_bo); - info->accel_state->vb_ptr = NULL; + radeon_bo_unmap(info->accel_state->vb_bo); + info->accel_state->vb_ptr = NULL; + info->accel_state->vb_start_op = 0; + info->accel_state->vb_offset = 0; } radeon_cs_emit(info->cs); |