From 69ec7a35e2a0a3d802ec093a6aab2d7ed2cc88be Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Tue, 25 Aug 2009 16:14:02 -0400 Subject: r6xx/r7xx: first pass at kms accel support Adapted from various patches from Dave and Jerome. --- src/r600_exa.c | 531 +++++++++++++++++++++++++++++++---------- src/r600_state.h | 50 +++- src/r600_textured_videofuncs.c | 78 ++++-- src/r6xx_accel.c | 189 +++++++++++++-- src/radeon.h | 5 + src/radeon_dri2.c | 4 +- src/radeon_exa.c | 8 +- 7 files changed, 688 insertions(+), 177 deletions(-) diff --git a/src/r600_exa.c b/src/r600_exa.c index 555748ba..03d3d8c2 100644 --- a/src/r600_exa.c +++ b/src/r600_exa.c @@ -133,7 +133,15 @@ R600PrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg) if (!R600CheckBPP(pPix->drawable.bitsPerPixel)) RADEON_FALLBACK(("R600CheckDatatype failed\n")); - accel_state->dst_mc_addr = exaGetPixmapOffset(pPix) + info->fbLocation + pScrn->fbOffset; +#if defined(XF86DRM_MODE) + if (info->cs) { + accel_state->dst_mc_addr = 0; + accel_state->dst_bo = radeon_get_pixmap_bo(pPix); + accel_state->src_bo[0] = NULL; + accel_state->src_bo[1] = NULL; + } else +#endif + accel_state->dst_mc_addr = exaGetPixmapOffset(pPix) + info->fbLocation + pScrn->fbOffset; accel_state->dst_size = exaGetPixmapPitch(pPix) * pPix->drawable.height; accel_state->dst_pitch = exaGetPixmapPitch(pPix) / (pPix->drawable.bitsPerPixel / 8); @@ -156,10 +164,15 @@ R600PrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg) pPix->drawable.bitsPerPixel, exaGetPixmapPitch(pPix)); #endif - accel_state->ib = RADEONCPGetBuffer(pScrn); - r600_vb_get(pScrn); + r600_cp_start(pScrn); + if (!r600_vb_get(pScrn)) + RADEON_FALLBACK(("Can't get VB\n")); /* Init */ +#if defined(XF86DRM_MODE) + if (info->cs) + accel_state->XInited3D = FALSE; +#endif start_3d(pScrn, accel_state->ib); set_default_state(pScrn, accel_state->ib); @@ -168,10 +181,18 @@ R600PrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg) set_screen_scissor(pScrn, accel_state->ib, 0, 0, pPix->drawable.width, pPix->drawable.height); set_window_scissor(pScrn, accel_state->ib, 0, 0, pPix->drawable.width, pPix->drawable.height); - accel_state->vs_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + - accel_state->solid_vs_offset; - accel_state->ps_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + - accel_state->solid_ps_offset; +#if defined(XF86DRM_MODE) + if (info->cs) { + accel_state->vs_mc_addr = accel_state->solid_vs_offset; + accel_state->ps_mc_addr = accel_state->solid_ps_offset; + } else +#endif + { + accel_state->vs_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + + accel_state->solid_vs_offset; + accel_state->ps_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + + accel_state->solid_ps_offset; + } accel_state->vs_size = 512; accel_state->ps_size = 512; @@ -179,16 +200,19 @@ R600PrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg) /* flush SQ cache */ cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit, - accel_state->vs_size, accel_state->vs_mc_addr); + accel_state->vs_size, accel_state->vs_mc_addr, + accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0); vs_conf.shader_addr = accel_state->vs_mc_addr; vs_conf.num_gprs = 2; vs_conf.stack_size = 0; + vs_conf.bo = accel_state->shaders_bo; vs_setup (pScrn, accel_state->ib, &vs_conf); /* flush SQ cache */ cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit, - accel_state->ps_size, accel_state->ps_mc_addr); + accel_state->ps_size, accel_state->ps_mc_addr, + accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0); ps_conf.shader_addr = accel_state->ps_mc_addr; ps_conf.num_gprs = 1; @@ -196,6 +220,7 @@ R600PrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg) ps_conf.uncached_first_inst = 1; ps_conf.clamp_consts = 0; ps_conf.export_mode = 2; + ps_conf.bo = accel_state->shaders_bo; ps_setup (pScrn, accel_state->ib, &ps_conf); /* Render setup */ @@ -216,6 +241,7 @@ R600PrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg) cb_conf.w = accel_state->dst_pitch; cb_conf.h = pPix->drawable.height; cb_conf.base = accel_state->dst_mc_addr; + cb_conf.bo = accel_state->dst_bo; if (pPix->drawable.bitsPerPixel == 8) { cb_conf.format = COLOR_8; @@ -296,8 +322,9 @@ R600Solid(PixmapPtr pPix, int x1, int y1, int x2, int y2) if (((accel_state->vb_index + 3) * 8) > accel_state->vb_total) { R600DoneSolid(pPix); - accel_state->ib = RADEONCPGetBuffer(pScrn); - r600_vb_get(pScrn); + r600_cp_start(pScrn); + if (!r600_vb_get(pScrn)) + return; } vb = (pointer)((char*)accel_state->vb_ptr+accel_state->vb_index*8); @@ -342,10 +369,12 @@ R600DoneSolid(PixmapPtr pPix) (info->ChipFamily == CHIP_FAMILY_RS880) || (info->ChipFamily == CHIP_FAMILY_RV710)) cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit, - accel_state->vb_size, accel_state->vb_mc_addr); + accel_state->vb_size, accel_state->vb_mc_addr, + accel_state->vb_bo, RADEON_GEM_DOMAIN_GTT, 0); else cp_set_surface_sync(pScrn, accel_state->ib, VC_ACTION_ENA_bit, - accel_state->vb_size, accel_state->vb_mc_addr); + accel_state->vb_size, accel_state->vb_mc_addr, + accel_state->vb_bo, RADEON_GEM_DOMAIN_GTT, 0); /* Vertex buffer setup */ vtx_res.id = SQ_VTX_RESOURCE_vs; @@ -353,6 +382,7 @@ R600DoneSolid(PixmapPtr pPix) vtx_res.vtx_num_entries = accel_state->vb_size / 4; vtx_res.mem_req_size = 1; vtx_res.vb_addr = accel_state->vb_mc_addr; + vtx_res.bo = accel_state->vb_bo; set_vtx_resource (pScrn, accel_state->ib, &vtx_res); /* Draw */ @@ -368,15 +398,22 @@ R600DoneSolid(PixmapPtr pPix) /* sync dst surface */ cp_set_surface_sync(pScrn, accel_state->ib, (CB_ACTION_ENA_bit | CB0_DEST_BASE_ENA_bit), - accel_state->dst_size, accel_state->dst_mc_addr); + accel_state->dst_size, accel_state->dst_mc_addr, + accel_state->dst_bo, RADEON_GEM_DOMAIN_VRAM, 0); R600CPFlushIndirect(pScrn, accel_state->ib); + + accel_state->src_bo[0] = NULL; + accel_state->src_bo[1] = NULL; + accel_state->dst_bo = NULL; } static void R600DoPrepareCopy(ScrnInfoPtr pScrn, - int src_pitch, int src_width, int src_height, uint32_t src_offset, int src_bpp, - int dst_pitch, int dst_width, int dst_height, uint32_t dst_offset, int dst_bpp, + int src_pitch, int src_width, int src_height, + uint32_t src_offset, struct radeon_bo *src_bo, int src_bpp, + int dst_pitch, int dst_width, int dst_height, + uint32_t dst_offset, struct radeon_bo *dst_bo, int dst_bpp, int rop, Pixel planemask) { RADEONInfoPtr info = RADEONPTR(pScrn); @@ -393,8 +430,25 @@ R600DoPrepareCopy(ScrnInfoPtr pScrn, CLEAR (vs_conf); CLEAR (ps_conf); - accel_state->ib = RADEONCPGetBuffer(pScrn); - r600_vb_get(pScrn); + accel_state->src_size[0] = src_pitch * src_height * (src_bpp/8); + accel_state->src_mc_addr[0] = src_offset; + accel_state->src_pitch[0] = src_pitch; + accel_state->src_width[0] = src_width; + accel_state->src_height[0] = src_height; + accel_state->src_bpp[0] = src_bpp; + accel_state->src_bo[0] = src_bo; + accel_state->src_bo[1] = NULL; + + accel_state->dst_size = dst_pitch * dst_height * (dst_bpp/8); + accel_state->dst_mc_addr = dst_offset; + accel_state->dst_pitch = dst_pitch; + accel_state->dst_height = dst_height; + accel_state->dst_bpp = dst_bpp; + accel_state->dst_bo = dst_bo; + + r600_cp_start(pScrn); + if (!r600_vb_get(pScrn)) + return; /* Init */ start_3d(pScrn, accel_state->ib); @@ -405,10 +459,18 @@ R600DoPrepareCopy(ScrnInfoPtr pScrn, set_screen_scissor(pScrn, accel_state->ib, 0, 0, dst_width, dst_height); set_window_scissor(pScrn, accel_state->ib, 0, 0, dst_width, dst_height); - accel_state->vs_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + - accel_state->copy_vs_offset; - accel_state->ps_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + - accel_state->copy_ps_offset; +#if defined(XF86DRM_MODE) + if (info->cs) { + accel_state->vs_mc_addr = accel_state->copy_vs_offset; + accel_state->ps_mc_addr = accel_state->copy_ps_offset; + } else +#endif + { + accel_state->vs_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + + accel_state->copy_vs_offset; + accel_state->ps_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + + accel_state->copy_ps_offset; + } accel_state->vs_size = 512; accel_state->ps_size = 512; @@ -416,16 +478,19 @@ R600DoPrepareCopy(ScrnInfoPtr pScrn, /* flush SQ cache */ cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit, - accel_state->vs_size, accel_state->vs_mc_addr); + accel_state->vs_size, accel_state->vs_mc_addr, + accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0); vs_conf.shader_addr = accel_state->vs_mc_addr; vs_conf.num_gprs = 2; vs_conf.stack_size = 0; + vs_conf.bo = accel_state->shaders_bo; vs_setup (pScrn, accel_state->ib, &vs_conf); /* flush SQ cache */ cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit, - accel_state->ps_size, accel_state->ps_mc_addr); + accel_state->ps_size, accel_state->ps_mc_addr, + accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0); ps_conf.shader_addr = accel_state->ps_mc_addr; ps_conf.num_gprs = 1; @@ -433,18 +498,13 @@ R600DoPrepareCopy(ScrnInfoPtr pScrn, ps_conf.uncached_first_inst = 1; ps_conf.clamp_consts = 0; ps_conf.export_mode = 2; + ps_conf.bo = accel_state->shaders_bo; ps_setup (pScrn, accel_state->ib, &ps_conf); - accel_state->src_size[0] = src_pitch * src_height * (src_bpp/8); - accel_state->src_mc_addr[0] = src_offset; - accel_state->src_pitch[0] = src_pitch; - accel_state->src_width[0] = src_width; - accel_state->src_height[0] = src_height; - accel_state->src_bpp[0] = src_bpp; - /* flush texture cache */ cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit, - accel_state->src_size[0], accel_state->src_mc_addr[0]); + accel_state->src_size[0], accel_state->src_mc_addr[0], + accel_state->src_bo[0], RADEON_GEM_DOMAIN_VRAM, 0); /* Texture */ tex_res.id = 0; @@ -455,6 +515,8 @@ R600DoPrepareCopy(ScrnInfoPtr pScrn, tex_res.dim = SQ_TEX_DIM_2D; tex_res.base = accel_state->src_mc_addr[0]; tex_res.mip_base = accel_state->src_mc_addr[0]; + tex_res.bo = accel_state->src_bo[0]; + tex_res.mip_bo = accel_state->src_bo[0]; if (src_bpp == 8) { tex_res.format = FMT_8; tex_res.dst_sel_x = SQ_SEL_1; /* R */ @@ -506,16 +568,11 @@ R600DoPrepareCopy(ScrnInfoPtr pScrn, EREG(accel_state->ib, CB_COLOR_CONTROL, RADEON_ROP[rop]); END_BATCH(); - accel_state->dst_size = dst_pitch * dst_height * (dst_bpp/8); - accel_state->dst_mc_addr = dst_offset; - accel_state->dst_pitch = dst_pitch; - accel_state->dst_height = dst_height; - accel_state->dst_bpp = dst_bpp; - cb_conf.id = 0; cb_conf.w = accel_state->dst_pitch; cb_conf.h = dst_height; cb_conf.base = accel_state->dst_mc_addr; + cb_conf.bo = accel_state->dst_bo; if (dst_bpp == 8) { cb_conf.format = COLOR_8; cb_conf.comp_swap = 3; /* A */ @@ -575,10 +632,12 @@ R600DoCopy(ScrnInfoPtr pScrn) (info->ChipFamily == CHIP_FAMILY_RS880) || (info->ChipFamily == CHIP_FAMILY_RV710)) cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit, - accel_state->vb_size, accel_state->vb_mc_addr); + accel_state->vb_size, accel_state->vb_mc_addr, + accel_state->vb_bo, RADEON_GEM_DOMAIN_GTT, 0); else cp_set_surface_sync(pScrn, accel_state->ib, VC_ACTION_ENA_bit, - accel_state->vb_size, accel_state->vb_mc_addr); + accel_state->vb_size, accel_state->vb_mc_addr, + accel_state->vb_bo, RADEON_GEM_DOMAIN_GTT, 0); /* Vertex buffer setup */ vtx_res.id = SQ_VTX_RESOURCE_vs; @@ -586,6 +645,7 @@ R600DoCopy(ScrnInfoPtr pScrn) vtx_res.vtx_num_entries = accel_state->vb_size / 4; vtx_res.mem_req_size = 1; vtx_res.vb_addr = accel_state->vb_mc_addr; + vtx_res.bo = accel_state->vb_bo; set_vtx_resource (pScrn, accel_state->ib, &vtx_res); draw_conf.prim_type = DI_PT_RECTLIST; @@ -600,7 +660,8 @@ R600DoCopy(ScrnInfoPtr pScrn) /* sync dst surface */ cp_set_surface_sync(pScrn, accel_state->ib, (CB_ACTION_ENA_bit | CB0_DEST_BASE_ENA_bit), - accel_state->dst_size, accel_state->dst_mc_addr); + accel_state->dst_size, accel_state->dst_mc_addr, + accel_state->dst_bo, 0, RADEON_GEM_DOMAIN_VRAM); R600CPFlushIndirect(pScrn, accel_state->ib); } @@ -617,8 +678,9 @@ R600AppendCopyVertex(ScrnInfoPtr pScrn, if (((accel_state->vb_index + 3) * 16) > accel_state->vb_total) { R600DoCopy(pScrn); - accel_state->ib = RADEONCPGetBuffer(pScrn); - r600_vb_get(pScrn); + r600_cp_start(pScrn); + if (!r600_vb_get(pScrn)) + return; } vb = (pointer)((char*)accel_state->vb_ptr+accel_state->vb_index*16); @@ -663,8 +725,19 @@ R600PrepareCopy(PixmapPtr pSrc, PixmapPtr pDst, accel_state->dst_pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8); accel_state->src_pitch[0] = exaGetPixmapPitch(pSrc) / (pSrc->drawable.bitsPerPixel / 8); - accel_state->src_mc_addr[0] = exaGetPixmapOffset(pSrc) + info->fbLocation + pScrn->fbOffset; - accel_state->dst_mc_addr = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset; +#if defined(XF86DRM_MODE) + if (info->cs) { + accel_state->src_mc_addr[0] = 0; + accel_state->dst_mc_addr = 0; + accel_state->src_bo[0] = radeon_get_pixmap_bo(pSrc); + accel_state->src_bo[1] = NULL; + accel_state->dst_bo = radeon_get_pixmap_bo(pDst); + } else +#endif + { + accel_state->src_mc_addr[0] = exaGetPixmapOffset(pSrc) + info->fbLocation + pScrn->fbOffset; + accel_state->dst_mc_addr = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset; + } accel_state->src_width[0] = pSrc->drawable.width; accel_state->src_height[0] = pSrc->drawable.height; @@ -701,19 +774,46 @@ R600PrepareCopy(PixmapPtr pSrc, PixmapPtr pDst, unsigned long size = pDst->drawable.height * accel_state->dst_pitch * pDst->drawable.bitsPerPixel/8; accel_state->same_surface = TRUE; - if (accel_state->copy_area) { - exaOffscreenFree(pDst->drawable.pScreen, accel_state->copy_area); - accel_state->copy_area = NULL; +#if defined(XF86DRM_MODE) + if (info->cs) { + if (accel_state->copy_area_bo) { + radeon_bo_unref(accel_state->copy_area_bo); + accel_state->copy_area_bo = NULL; + } + accel_state->copy_area_bo = radeon_bo_open(info->bufmgr, 0, size, + 4096, + RADEON_GEM_DOMAIN_VRAM, + 0); + if (accel_state->copy_area_bo == NULL) { + R600IBDiscard(pScrn, accel_state->ib); + return FALSE; + } + radeon_cs_space_add_persistent_bo(info->cs, accel_state->copy_area_bo, + RADEON_GEM_DOMAIN_VRAM, 0); + if (radeon_cs_space_check(info->cs)) { + radeon_bo_unref(accel_state->copy_area_bo); + accel_state->copy_area_bo = NULL; + R600IBDiscard(pScrn, accel_state->ib); + return FALSE; + } + accel_state->copy_area = (void*)accel_state->copy_area_bo; + } else +#endif + { + if (accel_state->copy_area) { + exaOffscreenFree(pDst->drawable.pScreen, accel_state->copy_area); + accel_state->copy_area = NULL; + } + accel_state->copy_area = exaOffscreenAlloc(pDst->drawable.pScreen, size, 256, TRUE, NULL, NULL); } - accel_state->copy_area = exaOffscreenAlloc(pDst->drawable.pScreen, size, 256, TRUE, NULL, NULL); } else { accel_state->same_surface = FALSE; R600DoPrepareCopy(pScrn, accel_state->src_pitch[0], pSrc->drawable.width, pSrc->drawable.height, - accel_state->src_mc_addr[0], pSrc->drawable.bitsPerPixel, + accel_state->src_mc_addr[0], accel_state->src_bo[0], pSrc->drawable.bitsPerPixel, accel_state->dst_pitch, pDst->drawable.width, pDst->drawable.height, - accel_state->dst_mc_addr, pDst->drawable.bitsPerPixel, + accel_state->dst_mc_addr, accel_state->dst_bo, pDst->drawable.bitsPerPixel, rop, planemask); } @@ -745,6 +845,17 @@ R600OverlapCopy(PixmapPtr pDst, uint32_t dst_pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8); uint32_t dst_offset = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset; int i, hchunk, vchunk; + struct radeon_bo *dst_bo = NULL; + +#if defined(XF86DRM_MODE) + if (info->cs) { + dst_offset = 0; + dst_bo = radeon_get_pixmap_bo(pDst); + radeon_cs_space_add_persistent_bo(info->cs, dst_bo, + RADEON_GEM_DOMAIN_VRAM, 0); + radeon_cs_space_check(info->cs); + } +#endif if (is_overlap(srcX, srcX + w, srcY, srcY + h, dstX, dstX + w, dstY, dstY + h)) { @@ -760,8 +871,10 @@ R600OverlapCopy(PixmapPtr pDst, if ((w / hchunk) <= (h / vchunk)) { /* reduce to horizontal */ if (srcY > dstY ) { /* diagonal up */ R600DoPrepareCopy(pScrn, - dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, - dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, + dst_pitch, pDst->drawable.width, pDst->drawable.height, + dst_offset, dst_bo, pDst->drawable.bitsPerPixel, + dst_pitch, pDst->drawable.width, pDst->drawable.height, + dst_offset, dst_bo, pDst->drawable.bitsPerPixel, accel_state->rop, accel_state->planemask); R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, vchunk); R600DoCopy(pScrn); @@ -770,8 +883,10 @@ R600OverlapCopy(PixmapPtr pDst, dstY = dstY + vchunk; } else { /* diagonal down */ R600DoPrepareCopy(pScrn, - dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, - dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, + dst_pitch, pDst->drawable.width, pDst->drawable.height, + dst_offset, dst_bo, pDst->drawable.bitsPerPixel, + dst_pitch, pDst->drawable.width, pDst->drawable.height, + dst_offset, dst_bo, pDst->drawable.bitsPerPixel, accel_state->rop, accel_state->planemask); R600AppendCopyVertex(pScrn, srcX, srcY + h - vchunk, dstX, dstY + h - vchunk, w, vchunk); R600DoCopy(pScrn); @@ -781,8 +896,10 @@ R600OverlapCopy(PixmapPtr pDst, } else { /* reduce to vertical */ if (srcX > dstX ) { /* diagonal left */ R600DoPrepareCopy(pScrn, - dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, - dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, + dst_pitch, pDst->drawable.width, pDst->drawable.height, + dst_offset, dst_bo, pDst->drawable.bitsPerPixel, + dst_pitch, pDst->drawable.width, pDst->drawable.height, + dst_offset, dst_bo, pDst->drawable.bitsPerPixel, accel_state->rop, accel_state->planemask); R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, hchunk, h); R600DoCopy(pScrn); @@ -791,8 +908,10 @@ R600OverlapCopy(PixmapPtr pDst, dstX = dstX + hchunk; } else { /* diagonal right */ R600DoPrepareCopy(pScrn, - dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, - dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, + dst_pitch, pDst->drawable.width, pDst->drawable.height, + dst_offset, dst_bo, pDst->drawable.bitsPerPixel, + dst_pitch, pDst->drawable.width, pDst->drawable.height, + dst_offset, dst_bo, pDst->drawable.bitsPerPixel, accel_state->rop, accel_state->planemask); R600AppendCopyVertex(pScrn, srcX + w - hchunk, srcY, dstX + w - hchunk, dstY, hchunk, h); R600DoCopy(pScrn); @@ -807,8 +926,10 @@ R600OverlapCopy(PixmapPtr pDst, /* copy right to left */ for (i = w; i > 0; i -= hchunk) { R600DoPrepareCopy(pScrn, - dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, - dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, + dst_pitch, pDst->drawable.width, pDst->drawable.height, + dst_offset, dst_bo, pDst->drawable.bitsPerPixel, + dst_pitch, pDst->drawable.width, pDst->drawable.height, + dst_offset, dst_bo, pDst->drawable.bitsPerPixel, accel_state->rop, accel_state->planemask); R600AppendCopyVertex(pScrn, srcX + i - hchunk, srcY, dstX + i - hchunk, dstY, hchunk, h); R600DoCopy(pScrn); @@ -817,8 +938,10 @@ R600OverlapCopy(PixmapPtr pDst, /* copy left to right */ for (i = 0; i < w; i += hchunk) { R600DoPrepareCopy(pScrn, - dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, - dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, + dst_pitch, pDst->drawable.width, pDst->drawable.height, + dst_offset, dst_bo, pDst->drawable.bitsPerPixel, + dst_pitch, pDst->drawable.width, pDst->drawable.height, + dst_offset, dst_bo, pDst->drawable.bitsPerPixel, accel_state->rop, accel_state->planemask); R600AppendCopyVertex(pScrn, srcX + i, srcY, dstX + i, dstY, hchunk, h); @@ -830,8 +953,10 @@ R600OverlapCopy(PixmapPtr pDst, /* copy top to bottom */ for (i = 0; i < h; i += vchunk) { R600DoPrepareCopy(pScrn, - dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, - dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, + dst_pitch, pDst->drawable.width, pDst->drawable.height, + dst_offset, dst_bo, pDst->drawable.bitsPerPixel, + dst_pitch, pDst->drawable.width, pDst->drawable.height, + dst_offset, dst_bo, pDst->drawable.bitsPerPixel, accel_state->rop, accel_state->planemask); if (vchunk > h - i) vchunk = h - i; @@ -842,8 +967,10 @@ R600OverlapCopy(PixmapPtr pDst, /* copy bottom to top */ for (i = h; i > 0; i -= vchunk) { R600DoPrepareCopy(pScrn, - dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, - dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, + dst_pitch, pDst->drawable.width, pDst->drawable.height, + dst_offset, dst_bo, pDst->drawable.bitsPerPixel, + dst_pitch, pDst->drawable.width, pDst->drawable.height, + dst_offset, dst_bo, pDst->drawable.bitsPerPixel, accel_state->rop, accel_state->planemask); if (vchunk > i) vchunk = i; @@ -854,8 +981,10 @@ R600OverlapCopy(PixmapPtr pDst, } } else { R600DoPrepareCopy(pScrn, - dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, - dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, + dst_pitch, pDst->drawable.width, pDst->drawable.height, + dst_offset, dst_bo, pDst->drawable.bitsPerPixel, + dst_pitch, pDst->drawable.width, pDst->drawable.height, + dst_offset, dst_bo, pDst->drawable.bitsPerPixel, accel_state->rop, accel_state->planemask); R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, h); @@ -872,27 +1001,45 @@ R600Copy(PixmapPtr pDst, ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum]; RADEONInfoPtr info = RADEONPTR(pScrn); struct radeon_accel_state *accel_state = info->accel_state; + struct radeon_bo *bo = NULL; if (accel_state->same_surface && (srcX == dstX) && (srcY == dstY)) return; - if (accel_state->same_surface && is_overlap(srcX, srcX + w, srcY, srcY + h, dstX, dstX + w, dstY, dstY + h)) { +#if defined(XF86DRM_MODE) + if (info->cs) + bo = radeon_get_pixmap_bo(pDst); +#endif + + if (accel_state->same_surface && + is_overlap(srcX, srcX + w, srcY, srcY + h, dstX, dstX + w, dstY, dstY + h)) { if (accel_state->copy_area) { uint32_t pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8); uint32_t orig_offset, tmp_offset; - tmp_offset = accel_state->copy_area->offset + info->fbLocation + pScrn->fbOffset; - orig_offset = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset; - +#if defined(XF86DRM_MODE) + if (info->cs) { + tmp_offset = 0; + orig_offset = 0; + } else +#endif + { + tmp_offset = accel_state->copy_area->offset + info->fbLocation + pScrn->fbOffset; + orig_offset = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset; + } R600DoPrepareCopy(pScrn, - pitch, pDst->drawable.width, pDst->drawable.height, orig_offset, pDst->drawable.bitsPerPixel, - pitch, pDst->drawable.width, pDst->drawable.height, tmp_offset, pDst->drawable.bitsPerPixel, + pitch, pDst->drawable.width, pDst->drawable.height, + orig_offset, bo, pDst->drawable.bitsPerPixel, + pitch, pDst->drawable.width, pDst->drawable.height, + tmp_offset, accel_state->copy_area_bo, pDst->drawable.bitsPerPixel, accel_state->rop, accel_state->planemask); R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, h); R600DoCopy(pScrn); R600DoPrepareCopy(pScrn, - pitch, pDst->drawable.width, pDst->drawable.height, tmp_offset, pDst->drawable.bitsPerPixel, - pitch, pDst->drawable.width, pDst->drawable.height, orig_offset, pDst->drawable.bitsPerPixel, + pitch, pDst->drawable.width, pDst->drawable.height, + tmp_offset, accel_state->copy_area_bo, pDst->drawable.bitsPerPixel, + pitch, pDst->drawable.width, pDst->drawable.height, + orig_offset, bo, pDst->drawable.bitsPerPixel, accel_state->rop, accel_state->planemask); R600AppendCopyVertex(pScrn, dstX, dstY, dstX, dstY, w, h); R600DoCopy(pScrn); @@ -900,11 +1047,20 @@ R600Copy(PixmapPtr pDst, R600OverlapCopy(pDst, srcX, srcY, dstX, dstY, w, h); } else if (accel_state->same_surface) { uint32_t pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8); - uint32_t offset = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset; + uint32_t offset; + +#if defined(XF86DRM_MODE) + if (info->cs) + offset = 0; + else +#endif + offset = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset; R600DoPrepareCopy(pScrn, - pitch, pDst->drawable.width, pDst->drawable.height, offset, pDst->drawable.bitsPerPixel, - pitch, pDst->drawable.width, pDst->drawable.height, offset, pDst->drawable.bitsPerPixel, + pitch, pDst->drawable.width, pDst->drawable.height, + offset, bo, pDst->drawable.bitsPerPixel, + pitch, pDst->drawable.width, pDst->drawable.height, + offset, bo, pDst->drawable.bitsPerPixel, accel_state->rop, accel_state->planemask); R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, h); R600DoCopy(pScrn); @@ -925,10 +1081,13 @@ R600DoneCopy(PixmapPtr pDst) R600DoCopy(pScrn); if (accel_state->copy_area) { - exaOffscreenFree(pDst->drawable.pScreen, accel_state->copy_area); + if (!info->cs) + exaOffscreenFree(pDst->drawable.pScreen, accel_state->copy_area); accel_state->copy_area = NULL; } - + accel_state->src_bo[0] = NULL; + accel_state->src_bo[1] = NULL; + accel_state->dst_bo = NULL; } @@ -1103,7 +1262,16 @@ static Bool R600TextureSetup(PicturePtr pPict, PixmapPtr pPix, CLEAR (tex_res); CLEAR (tex_samp); - accel_state->src_mc_addr[unit] = exaGetPixmapOffset(pPix) + info->fbLocation + pScrn->fbOffset; +#if defined(XF86DRM_MODE) + if (info->cs) { + accel_state->src_mc_addr[unit] = 0; + accel_state->src_bo[unit] = radeon_get_pixmap_bo(pPix); + radeon_cs_space_add_persistent_bo(info->cs, accel_state->src_bo[unit], + RADEON_GEM_DOMAIN_VRAM, 0); + radeon_cs_space_check(info->cs); + } else +#endif + accel_state->src_mc_addr[unit] = exaGetPixmapOffset(pPix) + info->fbLocation + pScrn->fbOffset; accel_state->src_pitch[unit] = exaGetPixmapPitch(pPix) / (pPix->drawable.bitsPerPixel / 8); accel_state->src_size[unit] = exaGetPixmapPitch(pPix) * pPix->drawable.height; @@ -1122,7 +1290,8 @@ static Bool R600TextureSetup(PicturePtr pPict, PixmapPtr pPix, /* flush texture cache */ cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit, - accel_state->src_size[unit], accel_state->src_mc_addr[unit]); + accel_state->src_size[unit], accel_state->src_mc_addr[unit], + accel_state->src_bo[unit], RADEON_GEM_DOMAIN_VRAM, 0); /* Texture */ tex_res.id = unit; @@ -1134,6 +1303,8 @@ static Bool R600TextureSetup(PicturePtr pPict, PixmapPtr pPix, tex_res.base = accel_state->src_mc_addr[unit]; tex_res.mip_base = accel_state->src_mc_addr[unit]; tex_res.format = R600TexFormats[i].card_fmt; + tex_res.bo = accel_state->src_bo[unit]; + tex_res.mip_bo = accel_state->src_bo[unit]; tex_res.request_size = 1; /* component swizzles */ @@ -1406,6 +1577,9 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture, /* return FALSE; */ + if (pDst->drawable.bitsPerPixel < 8 || pSrc->drawable.bitsPerPixel < 8) + return FALSE; + if (pMask) { accel_state->msk_pic = pMaskPicture; if (pMaskPicture->componentAlpha) { @@ -1424,7 +1598,13 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture, accel_state->src_alpha = FALSE; } - accel_state->dst_mc_addr = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset; +#if defined(XF86DRM_MODE) + if (info->cs) { + accel_state->dst_mc_addr = 0; + accel_state->dst_bo = radeon_get_pixmap_bo(pDst); + } else +#endif + accel_state->dst_mc_addr = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset; accel_state->dst_pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8); accel_state->dst_size = exaGetPixmapPitch(pDst) * pDst->drawable.height; @@ -1441,8 +1621,9 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture, CLEAR (vs_conf); CLEAR (ps_conf); - accel_state->ib = RADEONCPGetBuffer(pScrn); - r600_vb_get(pScrn); + r600_cp_start(pScrn); + if (!r600_vb_get(pScrn)) + RADEON_FALLBACK(("Can't get VB\n")); /* Init */ start_3d(pScrn, accel_state->ib); @@ -1470,16 +1651,31 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture, if (pMask) { set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_vs, (1 << 0)); - accel_state->ps_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + - accel_state->comp_mask_ps_offset; +#if defined(XF86DRM_MODE) + if (info->cs) + accel_state->ps_mc_addr = accel_state->comp_mask_ps_offset; + else +#endif + accel_state->ps_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + + accel_state->comp_mask_ps_offset; } else { set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_vs, (0 << 0)); - accel_state->ps_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + +#if defined(XF86DRM_MODE) + if (info->cs) + accel_state->ps_mc_addr = accel_state->comp_ps_offset; + else +#endif + accel_state->ps_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + accel_state->comp_ps_offset; } - accel_state->vs_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + - accel_state->comp_vs_offset; +#if defined(XF86DRM_MODE) + if (info->cs) + accel_state->vs_mc_addr = accel_state->comp_vs_offset; + else +#endif + accel_state->vs_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + + accel_state->comp_vs_offset; accel_state->vs_size = 512; accel_state->ps_size = 512; @@ -1488,16 +1684,19 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture, /* flush SQ cache */ cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit, - accel_state->vs_size, accel_state->vs_mc_addr); + accel_state->vs_size, accel_state->vs_mc_addr, + accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0); vs_conf.shader_addr = accel_state->vs_mc_addr; vs_conf.num_gprs = 3; vs_conf.stack_size = 1; + vs_conf.bo = accel_state->shaders_bo; vs_setup (pScrn, accel_state->ib, &vs_conf); /* flush SQ cache */ cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit, - accel_state->ps_size, accel_state->ps_mc_addr); + accel_state->ps_size, accel_state->ps_mc_addr, + accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0); ps_conf.shader_addr = accel_state->ps_mc_addr; ps_conf.num_gprs = 3; @@ -1505,6 +1704,7 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture, ps_conf.uncached_first_inst = 1; ps_conf.clamp_consts = 0; ps_conf.export_mode = 2; + ps_conf.bo = accel_state->shaders_bo; ps_setup (pScrn, accel_state->ib, &ps_conf); BEGIN_BATCH(12); @@ -1529,6 +1729,7 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture, cb_conf.h = pDst->drawable.height; cb_conf.base = accel_state->dst_mc_addr; cb_conf.format = dst_format; + cb_conf.bo = accel_state->dst_bo; switch (pDstPicture->format) { case PICT_a8r8g8b8: @@ -1611,8 +1812,9 @@ static void R600Composite(PixmapPtr pDst, if (((accel_state->vb_index + 3) * 24) > accel_state->vb_total) { R600DoneComposite(pDst); - accel_state->ib = RADEONCPGetBuffer(pScrn); - r600_vb_get(pScrn); + r600_cp_start(pScrn); + if (!r600_vb_get(pScrn)) + return; } vb = (pointer)((char*)accel_state->vb_ptr+accel_state->vb_index*24); @@ -1650,8 +1852,9 @@ static void R600Composite(PixmapPtr pDst, } else { if (((accel_state->vb_index + 3) * 16) > accel_state->vb_total) { R600DoneComposite(pDst); - accel_state->ib = RADEONCPGetBuffer(pScrn); - r600_vb_get(pScrn); + r600_cp_start(pScrn); + if (!r600_vb_get(pScrn)) + return; } vb = (pointer)((char*)accel_state->vb_ptr+accel_state->vb_index*16); @@ -1701,6 +1904,7 @@ static void R600DoneComposite(PixmapPtr pDst) vtx_res.vtx_num_entries = accel_state->vb_size / 4; vtx_res.mem_req_size = 1; vtx_res.vb_addr = accel_state->vb_mc_addr; + vtx_res.bo = accel_state->vb_bo; } else { accel_state->vb_size = accel_state->vb_index * 16; vtx_res.id = SQ_VTX_RESOURCE_vs; @@ -1708,6 +1912,7 @@ static void R600DoneComposite(PixmapPtr pDst) vtx_res.vtx_num_entries = accel_state->vb_size / 4; vtx_res.mem_req_size = 1; vtx_res.vb_addr = accel_state->vb_mc_addr; + vtx_res.bo = accel_state->vb_bo; } /* flush vertex cache */ if ((info->ChipFamily == CHIP_FAMILY_RV610) || @@ -1716,12 +1921,14 @@ static void R600DoneComposite(PixmapPtr pDst) (info->ChipFamily == CHIP_FAMILY_RS880) || (info->ChipFamily == CHIP_FAMILY_RV710)) cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit, - accel_state->vb_size, accel_state->vb_mc_addr); + accel_state->vb_size, accel_state->vb_mc_addr, + accel_state->vb_bo, RADEON_GEM_DOMAIN_GTT, 0); else cp_set_surface_sync(pScrn, accel_state->ib, VC_ACTION_ENA_bit, - accel_state->vb_size, accel_state->vb_mc_addr); + accel_state->vb_size, accel_state->vb_mc_addr, + accel_state->vb_bo, RADEON_GEM_DOMAIN_GTT, 0); - set_vtx_resource (pScrn, accel_state->ib, &vtx_res); + set_vtx_resource(pScrn, accel_state->ib, &vtx_res); draw_conf.prim_type = DI_PT_RECTLIST; draw_conf.vgt_draw_initiator = DI_SRC_SEL_AUTO_INDEX; @@ -1734,9 +1941,14 @@ static void R600DoneComposite(PixmapPtr pDst) wait_3d_idle_clean(pScrn, accel_state->ib); cp_set_surface_sync(pScrn, accel_state->ib, (CB_ACTION_ENA_bit | CB0_DEST_BASE_ENA_bit), - accel_state->dst_size, accel_state->dst_mc_addr); + accel_state->dst_size, accel_state->dst_mc_addr, + accel_state->dst_bo, RADEON_GEM_DOMAIN_VRAM, 0); R600CPFlushIndirect(pScrn, accel_state->ib); + + accel_state->src_bo[0] = NULL; + accel_state->src_bo[1] = NULL; + accel_state->dst_bo = NULL; } Bool @@ -1753,6 +1965,7 @@ R600CopyToVRAM(ScrnInfoPtr pScrn, int scratch_offset = 0, hpass, temph; char *dst; drmBufPtr scratch; + struct radeon_bo *bo = NULL; if (dst_pitch & 7) return FALSE; @@ -1795,8 +2008,10 @@ R600CopyToVRAM(ScrnInfoPtr pScrn, } /* blit from scratch to vram */ R600DoPrepareCopy(pScrn, - scratch_pitch, w, oldhpass, offset, bpp, - dst_pitch, dst_width, dst_height, dst_mc_addr, bpp, + scratch_pitch, w, oldhpass, + offset, bo, bpp, + dst_pitch, dst_width, dst_height, + dst_mc_addr, bo, bpp, 3, 0xffffffff); R600AppendCopyVertex(pScrn, 0, 0, x, y, w, oldhpass); R600DoCopy(pScrn); @@ -1842,6 +2057,7 @@ R600DownloadFromScreen(PixmapPtr pSrc, int x, int y, int w, int h, uint32_t scratch_pitch = scratch_pitch_bytes / (bpp / 8); int wpass = w * (bpp/8); drmBufPtr scratch; + struct radeon_bo *bo = NULL; /* RV740 seems to be particularly problematic with small xfers */ if ((info->ChipFamily == CHIP_FAMILY_RV740) && (w < 32 || h < 32)) @@ -1859,8 +2075,10 @@ R600DownloadFromScreen(PixmapPtr pSrc, int x, int y, int w, int h, /* blit from vram to scratch */ R600DoPrepareCopy(pScrn, - src_pitch, src_width, src_height, src_mc_addr, bpp, - scratch_pitch, src_width, hpass, scratch_mc_addr, bpp, + src_pitch, src_width, src_height, + src_mc_addr, bo, bpp, + scratch_pitch, src_width, hpass, + scratch_mc_addr, bo, bpp, 3, 0xffffffff); R600AppendCopyVertex(pScrn, x, y, 0, 0, w, hpass); R600DoCopy(pScrn); @@ -1876,8 +2094,10 @@ R600DownloadFromScreen(PixmapPtr pSrc, int x, int y, int w, int h, scratch_offset = scratch->total/2 - scratch_offset; /* blit from vram to scratch */ R600DoPrepareCopy(pScrn, - src_pitch, src_width, src_height, src_mc_addr, bpp, - scratch_pitch, src_width, hpass, scratch_mc_addr + scratch_offset, bpp, + src_pitch, src_width, src_height, + src_mc_addr, bo, bpp, + scratch_pitch, src_width, hpass, + scratch_mc_addr + scratch_offset, bo, bpp, 3, 0xffffffff); R600AppendCopyVertex(pScrn, x, y, 0, 0, w, hpass); R600DoCopy(pScrn); @@ -1919,7 +2139,12 @@ R600Sync(ScreenPtr pScreen, int marker) struct radeon_accel_state *accel_state = info->accel_state; if (accel_state->exaMarkerSynced != marker) { - RADEONWaitForIdleCP(pScrn); +#ifdef XF86DRM_MODE +#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4) + if (!info->cs) +#endif +#endif + RADEONWaitForIdleCP(pScrn); accel_state->exaMarkerSynced = marker; } @@ -1936,11 +2161,27 @@ R600AllocShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen) accel_state->shaders = NULL; - accel_state->shaders = exaOffscreenAlloc(pScreen, size, 256, - TRUE, NULL, NULL); +#ifdef XF86DRM_MODE +#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4) + if (info->cs) { + accel_state->shaders_bo = radeon_bo_open(info->bufmgr, 0, size, 4096, + RADEON_GEM_DOMAIN_VRAM, 0); + if (accel_state->shaders_bo == NULL) { + ErrorF("Allocating shader failed\n"); + return FALSE; + } + return TRUE; + } else +#endif +#endif + { + accel_state->shaders = exaOffscreenAlloc(pScreen, size, 256, + TRUE, NULL, NULL); + + if (accel_state->shaders == NULL) + return FALSE; + } - if (accel_state->shaders == NULL) - return FALSE; return TRUE; } @@ -1951,8 +2192,21 @@ R600LoadShaders(ScrnInfoPtr pScrn) struct radeon_accel_state *accel_state = info->accel_state; RADEONChipFamily ChipSet = info->ChipFamily; uint32_t *shader; - - shader = (pointer)((char *)info->FB + accel_state->shaders->offset); +#ifdef XF86DRM_MODE +#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4) + int ret; + + if (info->cs) { + ret = radeon_bo_map(accel_state->shaders_bo, 1); + if (ret) { + FatalError("failed to map shader %d\n", ret); + return FALSE; + } + shader = accel_state->shaders_bo->ptr; + } else +#endif +#endif + shader = (pointer)((char *)info->FB + accel_state->shaders->offset); /* solid vs --------------------------------------- */ accel_state->solid_vs_offset = 0; @@ -1990,6 +2244,14 @@ R600LoadShaders(ScrnInfoPtr pScrn) accel_state->xv_ps_offset = 4096; R600_xv_ps(ChipSet, shader + accel_state->xv_ps_offset / 4); +#ifdef XF86DRM_MODE +#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4) + if (info->cs) { + radeon_bo_unmap(accel_state->shaders_bo); + } +#endif +#endif + return TRUE; } @@ -2018,7 +2280,6 @@ R600FinishAccess(PixmapPtr pPix, int index) } - Bool R600DrawInit(ScreenPtr pScreen) { @@ -2044,13 +2305,28 @@ R600DrawInit(ScreenPtr pScreen) info->accel_state->exa->MarkSync = R600MarkSync; info->accel_state->exa->WaitMarker = R600Sync; - info->accel_state->exa->PrepareAccess = R600PrepareAccess; - info->accel_state->exa->FinishAccess = R600FinishAccess; - - /* AGP seems to have problems with gart transfers */ - if (info->accelDFS) { - info->accel_state->exa->UploadToScreen = R600UploadToScreen; - info->accel_state->exa->DownloadFromScreen = R600DownloadFromScreen; +#ifdef XF86DRM_MODE +#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4) + if (info->cs) { + info->accel_state->exa->CreatePixmap = RADEONEXACreatePixmap; + info->accel_state->exa->DestroyPixmap = RADEONEXADestroyPixmap; + info->accel_state->exa->PixmapIsOffscreen = RADEONEXAPixmapIsOffscreen; + info->accel_state->exa->PrepareAccess = RADEONPrepareAccess_CS; + info->accel_state->exa->FinishAccess = RADEONFinishAccess_CS; + info->accel_state->exa->UploadToScreen = NULL; + info->accel_state->exa->DownloadFromScreen = NULL; + } else +#endif +#endif + { + info->accel_state->exa->PrepareAccess = R600PrepareAccess; + info->accel_state->exa->FinishAccess = R600FinishAccess; + + /* AGP seems to have problems with gart transfers */ + if (info->accelDFS) { + info->accel_state->exa->UploadToScreen = R600UploadToScreen; + info->accel_state->exa->DownloadFromScreen = R600DownloadFromScreen; + } } info->accel_state->exa->flags = EXA_OFFSCREEN_PIXMAPS; @@ -2083,8 +2359,13 @@ R600DrawInit(ScreenPtr pScreen) return FALSE; } - if (!info->gartLocation) - return FALSE; +#ifdef XF86DRM_MODE +#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4) + if (!info->cs) +#endif +#endif + if (!info->gartLocation) + return FALSE; info->accel_state->XInited3D = FALSE; info->accel_state->copy_area = NULL; diff --git a/src/r600_state.h b/src/r600_state.h index 10b1022a..6ca88cf4 100644 --- a/src/r600_state.h +++ b/src/r600_state.h @@ -50,6 +50,7 @@ typedef struct { int round_mode; int tile_compact; int source_format; + struct radeon_bo *bo; } cb_config_t; /* Depth buffer */ @@ -63,6 +64,7 @@ typedef struct { int tile_surface_en; int tile_compact; int zrange_precision; + struct radeon_bo *bo; } db_config_t; /* Shader */ @@ -79,6 +81,7 @@ typedef struct { int clamp_consts; int export_mode; int uncached_first_inst; + struct radeon_bo *bo; } shader_config_t; /* Vertex buffer / vtx resource */ @@ -94,6 +97,7 @@ typedef struct { int srf_mode_all; int endian; int mem_req_size; + struct radeon_bo *bo; } vtx_resource_t; /* Texture resource */ @@ -129,6 +133,8 @@ typedef struct { int mpeg_clamp; int perf_modulation; int interlaced; + struct radeon_bo *bo; + struct radeon_bo *mip_bo; } tex_resource_t; /* Texture sampler */ @@ -170,15 +176,43 @@ typedef struct { uint32_t num_indices; } draw_config_t; +#if defined(XF86DRM_MODE) +#define BEGIN_BATCH(n) \ +do { \ + if (info->cs) \ + radeon_ddx_cs_start(pScrn, (n), __FILE__, __func__, __LINE__); \ +} while(0) +#define END_BATCH() \ +do { \ + if (info->cs) \ + radeon_cs_end(info->cs, __FILE__, __func__, __LINE__); \ +} while(0) +#define RELOC_BATCH(bo, rd, wd) \ +do { \ + if (info->cs) \ + OUT_RING_RELOC((bo), (rd), (wd)); \ +} while(0) +#define E32(ib, dword) \ +do { \ + if (info->cs) \ + radeon_cs_write_dword(info->cs, (dword)); \ + else { \ + uint32_t *ib_head = (pointer)(char*)(ib)->address; \ + ib_head[(ib)->used >> 2] = (dword); \ + (ib)->used += 4; \ + } \ +} while (0) +#else #define BEGIN_BATCH(n) do {} while(0) #define END_BATCH() do {} while(0) - +#define RELOC_BATCH(bo, wd, rd) do {} while(0) #define E32(ib, dword) \ do { \ uint32_t *ib_head = (pointer)(char*)(ib)->address; \ ib_head[(ib)->used >> 2] = (dword); \ (ib)->used += 4; \ } while (0) +#endif #define EFLOAT(ib, val) \ do { \ @@ -246,7 +280,8 @@ start_3d(ScrnInfoPtr pScrn, drmBufPtr ib); void set_render_target(ScrnInfoPtr pScrn, drmBufPtr ib, cb_config_t *cb_conf); void -cp_set_surface_sync(ScrnInfoPtr pScrn, drmBufPtr ib, uint32_t sync_type, uint32_t size, uint64_t mc_addr); +cp_set_surface_sync(ScrnInfoPtr pScrn, drmBufPtr ib, uint32_t sync_type, uint32_t size, uint64_t mc_addr, + struct radeon_bo *bo, uint32_t rdomains, uint32_t wdomain); void cp_wait_vline_sync(ScrnInfoPtr pScrn, drmBufPtr ib, PixmapPtr pPix, int crtc, int start, int stop); void @@ -282,9 +317,18 @@ draw_immd(ScrnInfoPtr pScrn, drmBufPtr ib, draw_config_t *draw_conf, uint32_t *i void draw_auto(ScrnInfoPtr pScrn, drmBufPtr ib, draw_config_t *draw_conf); -void +Bool r600_vb_get(ScrnInfoPtr pScrn); void r600_vb_discard(ScrnInfoPtr pScrn); +int +r600_cp_start(ScrnInfoPtr pScrn); + +extern Bool RADEONPrepareAccess_CS(PixmapPtr pPix, int index); +extern void RADEONFinishAccess_CS(PixmapPtr pPix, int index); +extern void *RADEONEXACreatePixmap(ScreenPtr pScreen, int size, int align); +extern void RADEONEXADestroyPixmap(ScreenPtr pScreen, void *driverPriv); +extern struct radeon_bo *radeon_get_pixmap_bo(PixmapPtr pPix); +extern Bool RADEONEXAPixmapIsOffscreen(PixmapPtr pPix); #endif diff --git a/src/r600_textured_videofuncs.c b/src/r600_textured_videofuncs.c index 67396162..9cbfea43 100644 --- a/src/r600_textured_videofuncs.c +++ b/src/r600_textured_videofuncs.c @@ -80,10 +80,12 @@ R600DoneTexturedVideo(ScrnInfoPtr pScrn) (info->ChipFamily == CHIP_FAMILY_RS880) || (info->ChipFamily == CHIP_FAMILY_RV710)) cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit, - accel_state->vb_size, accel_state->vb_mc_addr); + accel_state->vb_size, accel_state->vb_mc_addr, + accel_state->vb_bo, RADEON_GEM_DOMAIN_GTT, 0); else cp_set_surface_sync(pScrn, accel_state->ib, VC_ACTION_ENA_bit, - accel_state->vb_size, accel_state->vb_mc_addr); + accel_state->vb_size, accel_state->vb_mc_addr, + accel_state->vb_bo, RADEON_GEM_DOMAIN_GTT, 0); /* Vertex buffer setup */ vtx_res.id = SQ_VTX_RESOURCE_vs; @@ -91,6 +93,7 @@ R600DoneTexturedVideo(ScrnInfoPtr pScrn) vtx_res.vtx_num_entries = accel_state->vb_size / 4; vtx_res.mem_req_size = 1; vtx_res.vb_addr = accel_state->vb_mc_addr; + vtx_res.bo = accel_state->vb_bo; set_vtx_resource (pScrn, accel_state->ib, &vtx_res); draw_conf.prim_type = DI_PT_RECTLIST; @@ -105,9 +108,13 @@ R600DoneTexturedVideo(ScrnInfoPtr pScrn) /* sync destination surface */ cp_set_surface_sync(pScrn, accel_state->ib, (CB_ACTION_ENA_bit | CB0_DEST_BASE_ENA_bit), - accel_state->dst_size, accel_state->dst_mc_addr); + accel_state->dst_size, accel_state->dst_mc_addr, + accel_state->dst_bo, 0, RADEON_GEM_DOMAIN_VRAM); R600CPFlushIndirect(pScrn, accel_state->ib); + accel_state->dst_bo = NULL; + accel_state->src_bo[0] = NULL; + accel_state->src_bo[1] = NULL; } void @@ -216,6 +223,19 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) CLEAR (vs_conf); CLEAR (ps_conf); +#if defined(ACCEL_CP) && defined(XF86DRM_MODE) + if (info->cs) { + accel_state->dst_mc_addr = 0; + accel_state->src_mc_addr[0] = 0; + accel_state->src_bo[0] = pPriv->src_bo; + accel_state->src_bo[1] = NULL; + accel_state->dst_bo = radeon_get_pixmap_bo(pPixmap); + } else +#endif + { + accel_state->dst_mc_addr = exaGetPixmapOffset(pPixmap) + info->fbLocation + pScrn->fbOffset; + accel_state->src_mc_addr[0] = pPriv->src_offset + info->fbLocation + pScrn->fbOffset; + } accel_state->dst_pitch = exaGetPixmapPitch(pPixmap) / (pPixmap->drawable.bitsPerPixel / 8); accel_state->src_pitch[0] = pPriv->src_pitch; @@ -233,8 +253,9 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) dstyoff = 0; #endif - accel_state->ib = RADEONCPGetBuffer(pScrn); - r600_vb_get(pScrn); + r600_cp_start(pScrn); + if (!r600_vb_get(pScrn)) + return; /* Init */ start_3d(pScrn, accel_state->ib); @@ -245,11 +266,18 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) set_screen_scissor(pScrn, accel_state->ib, 0, 0, pPixmap->drawable.width, pPixmap->drawable.height); set_window_scissor(pScrn, accel_state->ib, 0, 0, pPixmap->drawable.width, pPixmap->drawable.height); - accel_state->vs_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + - accel_state->xv_vs_offset; - - accel_state->ps_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + - accel_state->xv_ps_offset; +#if defined(ACCEL_CP) && defined(XF86DRM_MODE) + if (info->cs) { + accel_state->vs_mc_addr = accel_state->xv_vs_offset; + accel_state->ps_mc_addr = accel_state->xv_ps_offset; + } else +#endif + { + accel_state->vs_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + + accel_state->xv_vs_offset; + accel_state->ps_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + + accel_state->xv_ps_offset; + } /* PS bool constant */ switch(pPriv->id) { @@ -271,16 +299,19 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) /* flush SQ cache */ cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit, - accel_state->vs_size, accel_state->vs_mc_addr); + accel_state->vs_size, accel_state->vs_mc_addr, + accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0); vs_conf.shader_addr = accel_state->vs_mc_addr; vs_conf.num_gprs = 2; vs_conf.stack_size = 0; + vs_conf.bo = accel_state->shaders_bo; vs_setup (pScrn, accel_state->ib, &vs_conf); /* flush SQ cache */ cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit, - accel_state->ps_size, accel_state->ps_mc_addr); + accel_state->ps_size, accel_state->ps_mc_addr, + accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0); ps_conf.shader_addr = accel_state->ps_mc_addr; ps_conf.num_gprs = 3; @@ -288,6 +319,7 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) ps_conf.uncached_first_inst = 1; ps_conf.clamp_consts = 0; ps_conf.export_mode = 2; + ps_conf.bo = accel_state->shaders_bo; ps_setup (pScrn, accel_state->ib, &ps_conf); /* PS alu constants */ @@ -298,12 +330,12 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) switch(pPriv->id) { case FOURCC_YV12: case FOURCC_I420: - accel_state->src_mc_addr[0] = pPriv->src_offset + info->fbLocation + pScrn->fbOffset; accel_state->src_size[0] = accel_state->src_pitch[0] * pPriv->h; /* flush texture cache */ cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit, accel_state->src_size[0], - accel_state->src_mc_addr[0]); + accel_state->src_mc_addr[0], + accel_state->src_bo[0], RADEON_GEM_DOMAIN_VRAM, 0); /* Y texture */ tex_res.id = 0; @@ -314,6 +346,8 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) tex_res.dim = SQ_TEX_DIM_2D; tex_res.base = accel_state->src_mc_addr[0]; tex_res.mip_base = accel_state->src_mc_addr[0]; + tex_res.bo = accel_state->src_bo[0]; + tex_res.mip_bo = accel_state->src_bo[0]; tex_res.format = FMT_8; tex_res.dst_sel_x = SQ_SEL_X; /* Y */ @@ -345,7 +379,8 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) /* U or V texture */ cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit, accel_state->src_size[0] / 4, - accel_state->src_mc_addr[0] + pPriv->planev_offset); + accel_state->src_mc_addr[0] + pPriv->planev_offset, + accel_state->src_bo[0], RADEON_GEM_DOMAIN_VRAM, 0); tex_res.id = 1; tex_res.format = FMT_8; @@ -369,7 +404,8 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) /* U or V texture */ cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit, accel_state->src_size[0] / 4, - accel_state->src_mc_addr[0] + pPriv->planeu_offset); + accel_state->src_mc_addr[0] + pPriv->planeu_offset, + accel_state->src_bo[0], RADEON_GEM_DOMAIN_VRAM, 0); tex_res.id = 2; tex_res.format = FMT_8; @@ -393,12 +429,12 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) case FOURCC_UYVY: case FOURCC_YUY2: default: - accel_state->src_mc_addr[0] = pPriv->src_offset + info->fbLocation + pScrn->fbOffset; accel_state->src_size[0] = accel_state->src_pitch[0] * pPriv->h; /* flush texture cache */ cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit, accel_state->src_size[0], - accel_state->src_mc_addr[0]); + accel_state->src_mc_addr[0], + accel_state->src_bo[0], RADEON_GEM_DOMAIN_VRAM, 0); /* Y texture */ tex_res.id = 0; @@ -409,6 +445,8 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) tex_res.dim = SQ_TEX_DIM_2D; tex_res.base = accel_state->src_mc_addr[0]; tex_res.mip_base = accel_state->src_mc_addr[0]; + tex_res.bo = accel_state->src_bo[0]; + tex_res.mip_bo = accel_state->src_bo[0]; tex_res.format = FMT_8_8; if (pPriv->id == FOURCC_UYVY) @@ -474,12 +512,10 @@ R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) END_BATCH(); cb_conf.id = 0; - - accel_state->dst_mc_addr = exaGetPixmapOffset(pPixmap) + info->fbLocation + pScrn->fbOffset; - cb_conf.w = accel_state->dst_pitch; cb_conf.h = pPixmap->drawable.height; cb_conf.base = accel_state->dst_mc_addr; + cb_conf.bo = accel_state->dst_bo; switch (pPixmap->drawable.bitsPerPixel) { case 16: diff --git a/src/r6xx_accel.c b/src/r6xx_accel.c index 059c3cca..4aa46504 100644 --- a/src/r6xx_accel.c +++ b/src/r6xx_accel.c @@ -39,6 +39,21 @@ #include "radeon_drm.h" +void r600_cs_flush_indirect(ScrnInfoPtr pScrn) +{ + RADEONInfoPtr info = RADEONPTR(pScrn); + int ret; + + if (!info->cs->cdw) + return; + radeon_cs_emit(info->cs); + radeon_cs_erase(info->cs); + + ret = radeon_cs_space_check(info->cs); + if (ret) + ErrorF("space check failed in flush\n"); +} + /* Flush the indirect buffer to the kernel for submission to the card */ void R600CPFlushIndirect(ScrnInfoPtr pScrn, drmBufPtr ib) { @@ -47,13 +62,20 @@ void R600CPFlushIndirect(ScrnInfoPtr pScrn, drmBufPtr ib) int start = 0; drm_radeon_indirect_t indirect; +#if defined(XF86DRM_MODE) + if (info->cs) { + r600_cs_flush_indirect(pScrn); + return; + } +#endif + if (!buffer) return; //xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Flushing buffer %d\n", // buffer->idx); while (buffer->used & 0x3c){ - BEGIN_BATCH(); + BEGIN_BATCH(1); E32(buffer, CP_PACKET2()); /* fill up to multiple of 16 dwords */ END_BATCH(); } @@ -72,6 +94,20 @@ void R600CPFlushIndirect(ScrnInfoPtr pScrn, drmBufPtr ib) void R600IBDiscard(ScrnInfoPtr pScrn, drmBufPtr ib) { +#if defined(XF86DRM_MODE) + int ret; + RADEONInfoPtr info = RADEONPTR(pScrn); + if (info->cs) { + if (CS_FULL(info->cs)) { + r600_cs_flush_indirect(pScrn); + return; + } + radeon_cs_erase(info->cs); + ret = radeon_cs_space_check(info->cs); + if (ret) + ErrorF("space check failed in flush\n"); + } +#endif if (!ib) return; ib->used = 0; @@ -81,6 +117,7 @@ void R600IBDiscard(ScrnInfoPtr pScrn, drmBufPtr ib) void wait_3d_idle_clean(ScrnInfoPtr pScrn, drmBufPtr ib) { + RADEONInfoPtr info = RADEONPTR(pScrn); //flush caches, don't generate timestamp BEGIN_BATCH(5); @@ -95,6 +132,8 @@ wait_3d_idle_clean(ScrnInfoPtr pScrn, drmBufPtr ib) void wait_3d_idle(ScrnInfoPtr pScrn, drmBufPtr ib) { + RADEONInfoPtr info = RADEONPTR(pScrn); + BEGIN_BATCH(3); EREG(ib, WAIT_UNTIL, WAIT_3D_IDLE_bit); END_BATCH(); @@ -212,19 +251,19 @@ set_render_target(ScrnInfoPtr pScrn, drmBufPtr ib, cb_config_t *cb_conf) h = (cb_conf->h + 7) & ~7; slice = ((cb_conf->w * h) / 64) - 1; - if ((info->ChipFamily > CHIP_FAMILY_R600) && - (info->ChipFamily < CHIP_FAMILY_RV770)) - BEGIN_BATCH(23); - else - BEGIN_BATCH(21); + BEGIN_BATCH(3 + 2); EREG(ib, (CB_COLOR0_BASE + (4 * cb_conf->id)), (cb_conf->base >> 8)); + RELOC_BATCH(cb_conf->bo, RADEON_GEM_DOMAIN_VRAM, 0); + END_BATCH(); // rv6xx workaround if ((info->ChipFamily > CHIP_FAMILY_R600) && (info->ChipFamily < CHIP_FAMILY_RV770)) { + BEGIN_BATCH(20); PACK3(ib, IT_SURFACE_BASE_UPDATE, 1); E32(ib, (2 << cb_conf->id)); - } + } else + BEGIN_BATCH(18); // pitch only for ARRAY_LINEAR_GENERAL, other tiling modes require addrlib EREG(ib, (CB_COLOR0_SIZE + (4 * cb_conf->id)), ((pitch << PITCH_TILE_MAX_shift) | @@ -240,20 +279,23 @@ set_render_target(ScrnInfoPtr pScrn, drmBufPtr ib, cb_config_t *cb_conf) } void -cp_set_surface_sync(ScrnInfoPtr pScrn, drmBufPtr ib, uint32_t sync_type, uint32_t size, uint64_t mc_addr) +cp_set_surface_sync(ScrnInfoPtr pScrn, drmBufPtr ib, uint32_t sync_type, uint32_t size, uint64_t mc_addr, + struct radeon_bo *bo, uint32_t rdomains, uint32_t wdomain) { + RADEONInfoPtr info = RADEONPTR(pScrn); uint32_t cp_coher_size; if (size == 0xffffffff) cp_coher_size = 0xffffffff; else cp_coher_size = ((size + 255) >> 8); - BEGIN_BATCH(5); + BEGIN_BATCH(5 + 2); PACK3(ib, IT_SURFACE_SYNC, 4); E32(ib, sync_type); E32(ib, cp_coher_size); E32(ib, (mc_addr >> 8)); E32(ib, 10); /* poll interval */ + RELOC_BATCH(bo, rdomains, wdomain); END_BATCH(); } @@ -266,6 +308,12 @@ void cp_wait_vline_sync(ScrnInfoPtr pScrn, drmBufPtr ib, PixmapPtr pPix, uint32_t offset; RADEONCrtcPrivatePtr radeon_crtc; + //XXX FIXME +#if defined(XF86DRM_MODE) + if (info->cs) + return; +#endif + if ((crtc < 0) || (crtc > 1)) return; @@ -314,6 +362,7 @@ void cp_wait_vline_sync(ScrnInfoPtr pScrn, drmBufPtr ib, PixmapPtr pPix, void fs_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *fs_conf) { + RADEONInfoPtr info = RADEONPTR(pScrn); uint32_t sq_pgm_resources; sq_pgm_resources = ((fs_conf->num_gprs << NUM_GPRS_shift) | @@ -322,8 +371,12 @@ fs_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *fs_conf) if (fs_conf->dx10_clamp) sq_pgm_resources |= SQ_PGM_RESOURCES_FS__DX10_CLAMP_bit; - BEGIN_BATCH(9); + BEGIN_BATCH(3 + 2); EREG(ib, SQ_PGM_START_FS, fs_conf->shader_addr >> 8); + RELOC_BATCH(fs_conf->bo, RADEON_GEM_DOMAIN_VRAM, 0); + END_BATCH(); + + BEGIN_BATCH(6); EREG(ib, SQ_PGM_RESOURCES_FS, sq_pgm_resources); EREG(ib, SQ_PGM_CF_OFFSET_FS, 0); END_BATCH(); @@ -332,6 +385,7 @@ fs_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *fs_conf) void vs_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *vs_conf) { + RADEONInfoPtr info = RADEONPTR(pScrn); uint32_t sq_pgm_resources; sq_pgm_resources = ((vs_conf->num_gprs << NUM_GPRS_shift) | @@ -344,8 +398,12 @@ vs_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *vs_conf) if (vs_conf->uncached_first_inst) sq_pgm_resources |= UNCACHED_FIRST_INST_bit; - BEGIN_BATCH(9); + BEGIN_BATCH(3 + 2); EREG(ib, SQ_PGM_START_VS, vs_conf->shader_addr >> 8); + RELOC_BATCH(vs_conf->bo, RADEON_GEM_DOMAIN_VRAM, 0); + END_BATCH(); + + BEGIN_BATCH(6); EREG(ib, SQ_PGM_RESOURCES_VS, sq_pgm_resources); EREG(ib, SQ_PGM_CF_OFFSET_VS, 0); END_BATCH(); @@ -354,6 +412,7 @@ vs_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *vs_conf) void ps_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *ps_conf) { + RADEONInfoPtr info = RADEONPTR(pScrn); uint32_t sq_pgm_resources; sq_pgm_resources = ((ps_conf->num_gprs << NUM_GPRS_shift) | @@ -368,8 +427,12 @@ ps_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *ps_conf) if (ps_conf->clamp_consts) sq_pgm_resources |= CLAMP_CONSTS_bit; - BEGIN_BATCH(12); + BEGIN_BATCH(3 + 2); EREG(ib, SQ_PGM_START_PS, ps_conf->shader_addr >> 8); + RELOC_BATCH(ps_conf->bo, RADEON_GEM_DOMAIN_VRAM, 0); + END_BATCH(); + + BEGIN_BATCH(9); EREG(ib, SQ_PGM_RESOURCES_PS, sq_pgm_resources); EREG(ib, SQ_PGM_EXPORTS_PS, ps_conf->export_mode); EREG(ib, SQ_PGM_CF_OFFSET_PS, 0); @@ -379,10 +442,11 @@ ps_setup(ScrnInfoPtr pScrn, drmBufPtr ib, shader_config_t *ps_conf) void set_alu_consts(ScrnInfoPtr pScrn, drmBufPtr ib, int offset, int count, float *const_buf) { + RADEONInfoPtr info = RADEONPTR(pScrn); int i; const int countreg = count * (SQ_ALU_CONSTANT_offset >> 2); - BEGIN_BATCH(2 + count_reg); + BEGIN_BATCH(2 + countreg); PACK0(ib, SQ_ALU_CONSTANT + offset * SQ_ALU_CONSTANT_offset, countreg); for (i = 0; i < countreg; i++) EFLOAT(ib, const_buf[i]); @@ -392,6 +456,7 @@ set_alu_consts(ScrnInfoPtr pScrn, drmBufPtr ib, int offset, int count, float *co void set_bool_consts(ScrnInfoPtr pScrn, drmBufPtr ib, int offset, uint32_t val) { + RADEONInfoPtr info = RADEONPTR(pScrn); /* bool register order is: ps, vs, gs; one register each * 1 bits per bool; 32 bools each for ps, vs, gs. */ @@ -403,6 +468,7 @@ set_bool_consts(ScrnInfoPtr pScrn, drmBufPtr ib, int offset, uint32_t val) void set_vtx_resource(ScrnInfoPtr pScrn, drmBufPtr ib, vtx_resource_t *res) { + RADEONInfoPtr info = RADEONPTR(pScrn); uint32_t sq_vtx_constant_word2; sq_vtx_constant_word2 = ((((res->vb_addr) >> 32) & BASE_ADDRESS_HI_mask) | @@ -419,7 +485,7 @@ set_vtx_resource(ScrnInfoPtr pScrn, drmBufPtr ib, vtx_resource_t *res) if (res->srf_mode_all) sq_vtx_constant_word2 |= SQ_VTX_CONSTANT_WORD2_0__SRF_MODE_ALL_bit; - BEGIN_BATCH(9); + BEGIN_BATCH(9 + 2); PACK0(ib, SQ_VTX_RESOURCE + res->id * SQ_VTX_RESOURCE_offset, 7); E32(ib, res->vb_addr & 0xffffffff); // 0: BASE_ADDRESS E32(ib, (res->vtx_num_entries << 2) - 1); // 1: SIZE @@ -428,12 +494,14 @@ set_vtx_resource(ScrnInfoPtr pScrn, drmBufPtr ib, vtx_resource_t *res) E32(ib, 0); // 4: n/a E32(ib, 0); // 5: n/a E32(ib, SQ_TEX_VTX_VALID_BUFFER << SQ_VTX_CONSTANT_WORD6_0__TYPE_shift); // 6: TYPE + RELOC_BATCH(res->bo, RADEON_GEM_DOMAIN_VRAM, 0); END_BATCH(); } void set_tex_resource(ScrnInfoPtr pScrn, drmBufPtr ib, tex_resource_t *tex_res) { + RADEONInfoPtr info = RADEONPTR(pScrn); uint32_t sq_tex_resource_word0, sq_tex_resource_word1, sq_tex_resource_word4; uint32_t sq_tex_resource_word5, sq_tex_resource_word6; @@ -483,7 +551,7 @@ set_tex_resource(ScrnInfoPtr pScrn, drmBufPtr ib, tex_resource_t *tex_res) if (tex_res->interlaced) sq_tex_resource_word6 |= INTERLACED_bit; - BEGIN_BATCH(9); + BEGIN_BATCH(9 + 4); PACK0(ib, SQ_TEX_RESOURCE + tex_res->id * SQ_TEX_RESOURCE_offset, 7); E32(ib, sq_tex_resource_word0); E32(ib, sq_tex_resource_word1); @@ -492,12 +560,15 @@ set_tex_resource(ScrnInfoPtr pScrn, drmBufPtr ib, tex_resource_t *tex_res) E32(ib, sq_tex_resource_word4); E32(ib, sq_tex_resource_word5); E32(ib, sq_tex_resource_word6); + RELOC_BATCH(tex_res->bo, RADEON_GEM_DOMAIN_VRAM, 0); + RELOC_BATCH(tex_res->mip_bo, RADEON_GEM_DOMAIN_VRAM, 0); END_BATCH(); } void set_tex_sampler (ScrnInfoPtr pScrn, drmBufPtr ib, tex_sampler_t *s) { + RADEONInfoPtr info = RADEONPTR(pScrn); uint32_t sq_tex_sampler_word0, sq_tex_sampler_word1, sq_tex_sampler_word2; sq_tex_sampler_word0 = ((s->clamp_x << SQ_TEX_SAMPLER_WORD0_0__CLAMP_X_shift) | @@ -549,6 +620,8 @@ set_tex_sampler (ScrnInfoPtr pScrn, drmBufPtr ib, tex_sampler_t *s) void set_screen_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int y2) { + RADEONInfoPtr info = RADEONPTR(pScrn); + BEGIN_BATCH(6); EREG(ib, PA_SC_SCREEN_SCISSOR_TL, ((x1 << PA_SC_SCREEN_SCISSOR_TL__TL_X_shift) | (y1 << PA_SC_SCREEN_SCISSOR_TL__TL_Y_shift))); @@ -560,6 +633,7 @@ set_screen_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int void set_vport_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int id, int x1, int y1, int x2, int y2) { + RADEONInfoPtr info = RADEONPTR(pScrn); BEGIN_BATCH(6); EREG(ib, PA_SC_VPORT_SCISSOR_0_TL + @@ -575,6 +649,7 @@ set_vport_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int id, int x1, int y1, int x void set_generic_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int y2) { + RADEONInfoPtr info = RADEONPTR(pScrn); BEGIN_BATCH(6); EREG(ib, PA_SC_GENERIC_SCISSOR_TL, ((x1 << PA_SC_GENERIC_SCISSOR_TL__TL_X_shift) | @@ -588,6 +663,8 @@ set_generic_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int void set_window_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int y2) { + RADEONInfoPtr info = RADEONPTR(pScrn); + BEGIN_BATCH(6); EREG(ib, PA_SC_WINDOW_SCISSOR_TL, ((x1 << PA_SC_WINDOW_SCISSOR_TL__TL_X_shift) | (y1 << PA_SC_WINDOW_SCISSOR_TL__TL_Y_shift) | @@ -600,6 +677,8 @@ set_window_scissor(ScrnInfoPtr pScrn, drmBufPtr ib, int x1, int y1, int x2, int void set_clip_rect(ScrnInfoPtr pScrn, drmBufPtr ib, int id, int x1, int y1, int x2, int y2) { + RADEONInfoPtr info = RADEONPTR(pScrn); + BEGIN_BATCH(6); EREG(ib, PA_SC_CLIPRECT_0_TL + id * PA_SC_CLIPRECT_0_TL_offset, ((x1 << PA_SC_CLIPRECT_0_TL__TL_X_shift) | @@ -966,6 +1045,7 @@ set_default_state(ScrnInfoPtr pScrn, drmBufPtr ib) END_BATCH(); // clear FS + fs_conf.bo = accel_state->shaders_bo; fs_setup(pScrn, ib, &fs_conf); // VGT @@ -1006,6 +1086,7 @@ set_default_state(ScrnInfoPtr pScrn, drmBufPtr ib) void draw_immd(ScrnInfoPtr pScrn, drmBufPtr ib, draw_config_t *draw_conf, uint32_t *indices) { + RADEONInfoPtr info = RADEONPTR(pScrn); uint32_t i, count; // calculate num of packets @@ -1043,6 +1124,8 @@ draw_immd(ScrnInfoPtr pScrn, drmBufPtr ib, draw_config_t *draw_conf, uint32_t *i void draw_auto(ScrnInfoPtr pScrn, drmBufPtr ib, draw_config_t *draw_conf) { + RADEONInfoPtr info = RADEONPTR(pScrn); + BEGIN_BATCH(10); EREG(ib, VGT_PRIMITIVE_TYPE, draw_conf->prim_type); PACK3(ib, IT_INDEX_TYPE, 1); @@ -1055,22 +1138,82 @@ draw_auto(ScrnInfoPtr pScrn, drmBufPtr ib, draw_config_t *draw_conf) END_BATCH(); } -void +Bool r600_vb_get(ScrnInfoPtr pScrn) { RADEONInfoPtr info = RADEONPTR(pScrn); struct radeon_accel_state *accel_state = info->accel_state; - - accel_state->vb_mc_addr = info->gartLocation + info->dri->bufStart + - (accel_state->ib->idx * accel_state->ib->total) + - (accel_state->ib->total / 2); - accel_state->vb_total = (accel_state->ib->total / 2); - accel_state->vb_ptr = (pointer)((char*)accel_state->ib->address + - (accel_state->ib->total / 2)); +#if defined(XF86DRM_MODE) + int ret; + if (info->cs) { + if (accel_state->vb_bo == NULL) { + accel_state->vb_mc_addr = 0; + accel_state->vb_bo = radeon_bo_open(info->bufmgr, 0, 16 * 1024, + 4096, RADEON_GEM_DOMAIN_GTT, 0); + if (accel_state->vb_bo == NULL) + return FALSE; + ret = radeon_bo_map(accel_state->vb_bo, 1); + if (ret) { + FatalError("failed to vb %d\n", ret); + return FALSE; + } + accel_state->vb_total = 16 * 1024; + accel_state->vb_ptr = accel_state->vb_bo->ptr; + } + } else +#endif + { + accel_state->vb_mc_addr = info->gartLocation + info->dri->bufStart + + (accel_state->ib->idx*accel_state->ib->total)+ + (accel_state->ib->total / 2); + accel_state->vb_total = (accel_state->ib->total / 2); + accel_state->vb_ptr = (pointer)((char*)accel_state->ib->address + + (accel_state->ib->total / 2)); + } accel_state->vb_index = 0; + return TRUE; } void r600_vb_discard(ScrnInfoPtr pScrn) { } + +int +r600_cp_start(ScrnInfoPtr pScrn) +{ + RADEONInfoPtr info = RADEONPTR(pScrn); + struct radeon_accel_state *accel_state = info->accel_state; + +#if defined(XF86DRM_MODE) + if (info->cs) { + if (!r600_vb_get(pScrn)) + return FALSE; + radeon_cs_space_reset_bos(info->cs); + radeon_cs_space_add_persistent_bo(info->cs, accel_state->shaders_bo, + RADEON_GEM_DOMAIN_VRAM, 0); + if (accel_state->src_bo[0]) + radeon_cs_space_add_persistent_bo(info->cs, accel_state->src_bo[0], + RADEON_GEM_DOMAIN_VRAM, 0); + if (accel_state->src_bo[1]) + radeon_cs_space_add_persistent_bo(info->cs, accel_state->src_bo[1], + RADEON_GEM_DOMAIN_VRAM, 0); + radeon_cs_space_add_persistent_bo(info->cs, accel_state->dst_bo, + RADEON_GEM_DOMAIN_VRAM, 0); + radeon_cs_space_add_persistent_bo(info->cs, accel_state->vb_bo, + RADEON_GEM_DOMAIN_GTT, 0); + if (accel_state->copy_area_bo) + radeon_cs_space_add_persistent_bo(info->cs, + accel_state->copy_area_bo, + RADEON_GEM_DOMAIN_VRAM, 0); + radeon_cs_space_check(info->cs); + } else +#endif + { + accel_state->ib = RADEONCPGetBuffer(pScrn); + if (!r600_vb_get(pScrn)) { + return -1; + } + } + return 0; +} diff --git a/src/radeon.h b/src/radeon.h index 3a3631e2..7fdd8f5e 100644 --- a/src/radeon.h +++ b/src/radeon.h @@ -696,9 +696,11 @@ struct radeon_accel_state { int vb_total; void *vb_ptr; uint32_t vb_size; + struct radeon_bo *vb_bo; // shader storage ExaOffscreenArea *shaders; + struct radeon_bo *shaders_bo; uint32_t solid_vs_offset; uint32_t solid_ps_offset; uint32_t copy_vs_offset; @@ -710,12 +712,14 @@ struct radeon_accel_state { uint32_t xv_ps_offset; //size/addr stuff + struct radeon_bo *src_bo[2]; uint32_t src_size[2]; uint64_t src_mc_addr[2]; uint32_t src_pitch[2]; uint32_t src_width[2]; uint32_t src_height[2]; uint32_t src_bpp[2]; + struct radeon_bo *dst_bo; uint32_t dst_size; uint64_t dst_mc_addr; uint32_t dst_pitch; @@ -731,6 +735,7 @@ struct radeon_accel_state { // copy ExaOffscreenArea *copy_area; + struct radeon_bo *copy_area_bo; Bool same_surface; int rop; uint32_t planemask; diff --git a/src/radeon_dri2.c b/src/radeon_dri2.c index b52f9659..efc6bdef 100644 --- a/src/radeon_dri2.c +++ b/src/radeon_dri2.c @@ -333,7 +333,9 @@ radeon_dri2_screen_init(ScreenPtr pScreen) return FALSE; } - if ( (info->ChipFamily >= CHIP_FAMILY_R300) ) { + if ( (info->ChipFamily >= CHIP_FAMILY_R600) ) { + dri2_info.driverName = R600_DRIVER_NAME; + } else if ( (info->ChipFamily >= CHIP_FAMILY_R300) ) { dri2_info.driverName = R300_DRIVER_NAME; } else if ( info->ChipFamily >= CHIP_FAMILY_R200 ) { dri2_info.driverName = R200_DRIVER_NAME; diff --git a/src/radeon_exa.c b/src/radeon_exa.c index 3f3c9ba5..56e87a98 100644 --- a/src/radeon_exa.c +++ b/src/radeon_exa.c @@ -336,7 +336,7 @@ static void RADEONFinishAccess_BE(PixmapPtr pPix, int index) #endif /* X_BYTE_ORDER == X_BIG_ENDIAN */ #ifdef XF86DRM_MODE -static Bool RADEONPrepareAccess_CS(PixmapPtr pPix, int index) +Bool RADEONPrepareAccess_CS(PixmapPtr pPix, int index) { ScrnInfoPtr pScrn = xf86Screens[pPix->drawable.pScreen->myNum]; struct radeon_exa_pixmap_priv *driver_priv; @@ -364,7 +364,7 @@ static Bool RADEONPrepareAccess_CS(PixmapPtr pPix, int index) return TRUE; } -static void RADEONFinishAccess_CS(PixmapPtr pPix, int index) +void RADEONFinishAccess_CS(PixmapPtr pPix, int index) { struct radeon_exa_pixmap_priv *driver_priv; @@ -456,7 +456,7 @@ void *RADEONEXACreatePixmap2(ScreenPtr pScreen, int width, int height, return new_priv; } -static void RADEONEXADestroyPixmap(ScreenPtr pScreen, void *driverPriv) +void RADEONEXADestroyPixmap(ScreenPtr pScreen, void *driverPriv) { struct radeon_exa_pixmap_priv *driver_priv = driverPriv; @@ -489,7 +489,7 @@ void radeon_set_pixmap_bo(PixmapPtr pPix, struct radeon_bo *bo) } } -static Bool RADEONEXAPixmapIsOffscreen(PixmapPtr pPix) +Bool RADEONEXAPixmapIsOffscreen(PixmapPtr pPix) { struct radeon_exa_pixmap_priv *driver_priv; -- cgit v1.2.3