diff options
author | Dave Airlie <airlied@redhat.com> | 2009-09-08 11:26:32 +1000 |
---|---|---|
committer | Dave Airlie <airlied@redhat.com> | 2009-09-08 11:26:32 +1000 |
commit | 6990f2ac6478bf92929a4400ef84fb2142699204 (patch) | |
tree | d24f40c20909f81ac363d6f283ce8bd2a4fca8ea /src/r600_exa.c | |
parent | 917f2d7cd1815279710939edc27133ca6609460f (diff) | |
parent | 853f4c3d1ea8f975ab2855f18d3ae336a4095091 (diff) |
Merge branch 'r6xx-cs'
Diffstat (limited to 'src/r600_exa.c')
-rw-r--r-- | src/r600_exa.c | 602 |
1 files changed, 435 insertions, 167 deletions
diff --git a/src/r600_exa.c b/src/r600_exa.c index d2df1dbe..86da68c4 100644 --- a/src/r600_exa.c +++ b/src/r600_exa.c @@ -128,12 +128,22 @@ R600PrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg) uint32_t a, r, g, b; float ps_alu_consts[4]; + //return FALSE; + if (pPix->drawable.bitsPerPixel == 24) RADEON_FALLBACK(("24bpp unsupported\n")); if (!R600CheckBPP(pPix->drawable.bitsPerPixel)) RADEON_FALLBACK(("R600CheckDatatype failed\n")); - accel_state->dst_mc_addr = exaGetPixmapOffset(pPix) + info->fbLocation + pScrn->fbOffset; +#if defined(XF86DRM_MODE) + if (info->cs) { + accel_state->dst_mc_addr = 0; + accel_state->dst_bo = radeon_get_pixmap_bo(pPix); + accel_state->src_bo[0] = NULL; + accel_state->src_bo[1] = NULL; + } else +#endif + accel_state->dst_mc_addr = exaGetPixmapOffset(pPix) + info->fbLocation + pScrn->fbOffset; accel_state->dst_size = exaGetPixmapPitch(pPix) * pPix->drawable.height; accel_state->dst_pitch = exaGetPixmapPitch(pPix) / (pPix->drawable.bitsPerPixel / 8); @@ -156,26 +166,33 @@ R600PrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg) pPix->drawable.bitsPerPixel, exaGetPixmapPitch(pPix)); #endif - accel_state->ib = RADEONCPGetBuffer(pScrn); - r600_vb_get(pScrn); + r600_cp_start(pScrn); /* Init */ +#if defined(XF86DRM_MODE) + if (info->cs) + accel_state->XInited3D = FALSE; +#endif start_3d(pScrn, accel_state->ib); set_default_state(pScrn, accel_state->ib); - /* Scissor / viewport */ - EREG(accel_state->ib, PA_CL_VTE_CNTL, VTX_XY_FMT_bit); - EREG(accel_state->ib, PA_CL_CLIP_CNTL, CLIP_DISABLE_bit); - set_generic_scissor(pScrn, accel_state->ib, 0, 0, pPix->drawable.width, pPix->drawable.height); set_screen_scissor(pScrn, accel_state->ib, 0, 0, pPix->drawable.width, pPix->drawable.height); set_window_scissor(pScrn, accel_state->ib, 0, 0, pPix->drawable.width, pPix->drawable.height); - accel_state->vs_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + - accel_state->solid_vs_offset; - accel_state->ps_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + - accel_state->solid_ps_offset; +#if defined(XF86DRM_MODE) + if (info->cs) { + accel_state->vs_mc_addr = accel_state->solid_vs_offset; + accel_state->ps_mc_addr = accel_state->solid_ps_offset; + } else +#endif + { + accel_state->vs_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + + accel_state->solid_vs_offset; + accel_state->ps_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + + accel_state->solid_ps_offset; + } accel_state->vs_size = 512; accel_state->ps_size = 512; @@ -183,16 +200,19 @@ R600PrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg) /* flush SQ cache */ cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit, - accel_state->vs_size, accel_state->vs_mc_addr); + accel_state->vs_size, accel_state->vs_mc_addr, + accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0); vs_conf.shader_addr = accel_state->vs_mc_addr; vs_conf.num_gprs = 2; vs_conf.stack_size = 0; + vs_conf.bo = accel_state->shaders_bo; vs_setup (pScrn, accel_state->ib, &vs_conf); /* flush SQ cache */ cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit, - accel_state->ps_size, accel_state->ps_mc_addr); + accel_state->ps_size, accel_state->ps_mc_addr, + accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0); ps_conf.shader_addr = accel_state->ps_mc_addr; ps_conf.num_gprs = 1; @@ -200,6 +220,7 @@ R600PrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg) ps_conf.uncached_first_inst = 1; ps_conf.clamp_consts = 0; ps_conf.export_mode = 2; + ps_conf.bo = accel_state->shaders_bo; ps_setup (pScrn, accel_state->ib, &ps_conf); /* Render setup */ @@ -211,14 +232,16 @@ R600PrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg) pmask |= 1; /* R */ if (pm & 0xff000000) pmask |= 8; /* A */ + BEGIN_BATCH(6); EREG(accel_state->ib, CB_SHADER_MASK, (pmask << OUTPUT0_ENABLE_shift)); - EREG(accel_state->ib, R7xx_CB_SHADER_CONTROL, (RT0_ENABLE_bit)); EREG(accel_state->ib, CB_COLOR_CONTROL, RADEON_ROP[alu]); + END_BATCH(); cb_conf.id = 0; cb_conf.w = accel_state->dst_pitch; cb_conf.h = pPix->drawable.height; cb_conf.base = accel_state->dst_mc_addr; + cb_conf.bo = accel_state->dst_bo; if (pPix->drawable.bitsPerPixel == 8) { cb_conf.format = COLOR_8; @@ -234,14 +257,9 @@ R600PrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg) cb_conf.blend_clamp = 1; set_render_target(pScrn, accel_state->ib, &cb_conf); - EREG(accel_state->ib, PA_SU_SC_MODE_CNTL, (FACE_bit | - (POLYMODE_PTYPE__TRIANGLES << POLYMODE_FRONT_PTYPE_shift) | - (POLYMODE_PTYPE__TRIANGLES << POLYMODE_BACK_PTYPE_shift))); - EREG(accel_state->ib, DB_SHADER_CONTROL, ((1 << Z_ORDER_shift) | /* EARLY_Z_THEN_LATE_Z */ - DUAL_EXPORT_ENABLE_bit)); /* Only useful if no depth export */ - /* Interpolator setup */ /* one unused export from VS (VS_EXPORT_COUNT is zero based, count minus one) */ + BEGIN_BATCH(18); EREG(accel_state->ib, SPI_VS_OUT_CONFIG, (0 << VS_EXPORT_COUNT_shift)); EREG(accel_state->ib, SPI_VS_OUT_ID_0, (0 << SEMANTIC_0_shift)); @@ -256,6 +274,7 @@ R600PrepareSolid(PixmapPtr pPix, int alu, Pixel pm, Pixel fg) FLAT_SHADE_bit | SEL_CENTROID_bit)); EREG(accel_state->ib, SPI_INTERP_CONTROL_0, FLAT_SHADE_ENA_bit); + END_BATCH(); /* PS alu constants */ if (pPix->drawable.bitsPerPixel == 16) { @@ -303,8 +322,7 @@ R600Solid(PixmapPtr pPix, int x1, int y1, int x2, int y2) if (((accel_state->vb_index + 3) * 8) > accel_state->vb_total) { R600DoneSolid(pPix); - accel_state->ib = RADEONCPGetBuffer(pScrn); - r600_vb_get(pScrn); + r600_cp_start(pScrn); } vb = (pointer)((char*)accel_state->vb_ptr+accel_state->vb_index*8); @@ -349,10 +367,12 @@ R600DoneSolid(PixmapPtr pPix) (info->ChipFamily == CHIP_FAMILY_RS880) || (info->ChipFamily == CHIP_FAMILY_RV710)) cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit, - accel_state->vb_size, accel_state->vb_mc_addr); + accel_state->vb_size, accel_state->vb_mc_addr, + accel_state->vb_bo, RADEON_GEM_DOMAIN_GTT, 0); else cp_set_surface_sync(pScrn, accel_state->ib, VC_ACTION_ENA_bit, - accel_state->vb_size, accel_state->vb_mc_addr); + accel_state->vb_size, accel_state->vb_mc_addr, + accel_state->vb_bo, RADEON_GEM_DOMAIN_GTT, 0); /* Vertex buffer setup */ vtx_res.id = SQ_VTX_RESOURCE_vs; @@ -360,6 +380,7 @@ R600DoneSolid(PixmapPtr pPix) vtx_res.vtx_num_entries = accel_state->vb_size / 4; vtx_res.mem_req_size = 1; vtx_res.vb_addr = accel_state->vb_mc_addr; + vtx_res.bo = accel_state->vb_bo; set_vtx_resource (pScrn, accel_state->ib, &vtx_res); /* Draw */ @@ -375,15 +396,18 @@ R600DoneSolid(PixmapPtr pPix) /* sync dst surface */ cp_set_surface_sync(pScrn, accel_state->ib, (CB_ACTION_ENA_bit | CB0_DEST_BASE_ENA_bit), - accel_state->dst_size, accel_state->dst_mc_addr); + accel_state->dst_size, accel_state->dst_mc_addr, + accel_state->dst_bo, RADEON_GEM_DOMAIN_VRAM, 0); R600CPFlushIndirect(pScrn, accel_state->ib); } static void R600DoPrepareCopy(ScrnInfoPtr pScrn, - int src_pitch, int src_width, int src_height, uint32_t src_offset, int src_bpp, - int dst_pitch, int dst_width, int dst_height, uint32_t dst_offset, int dst_bpp, + int src_pitch, int src_width, int src_height, + uint32_t src_offset, struct radeon_bo *src_bo, int src_bpp, + int dst_pitch, int dst_width, int dst_height, + uint32_t dst_offset, struct radeon_bo *dst_bo, int dst_bpp, int rop, Pixel planemask) { RADEONInfoPtr info = RADEONPTR(pScrn); @@ -400,26 +424,49 @@ R600DoPrepareCopy(ScrnInfoPtr pScrn, CLEAR (vs_conf); CLEAR (ps_conf); - accel_state->ib = RADEONCPGetBuffer(pScrn); - r600_vb_get(pScrn); + accel_state->src_size[0] = src_pitch * src_height * (src_bpp/8); + accel_state->src_mc_addr[0] = src_offset; + accel_state->src_pitch[0] = src_pitch; + accel_state->src_width[0] = src_width; + accel_state->src_height[0] = src_height; + accel_state->src_bpp[0] = src_bpp; + accel_state->src_bo[0] = src_bo; + accel_state->src_bo[1] = NULL; + + accel_state->dst_size = dst_pitch * dst_height * (dst_bpp/8); + accel_state->dst_mc_addr = dst_offset; + accel_state->dst_pitch = dst_pitch; + accel_state->dst_height = dst_height; + accel_state->dst_bpp = dst_bpp; + accel_state->dst_bo = dst_bo; + + r600_cp_start(pScrn); /* Init */ +#if defined(XF86DRM_MODE) + if (info->cs) + accel_state->XInited3D = FALSE; +#endif start_3d(pScrn, accel_state->ib); set_default_state(pScrn, accel_state->ib); - /* Scissor / viewport */ - EREG(accel_state->ib, PA_CL_VTE_CNTL, VTX_XY_FMT_bit); - EREG(accel_state->ib, PA_CL_CLIP_CNTL, CLIP_DISABLE_bit); - set_generic_scissor(pScrn, accel_state->ib, 0, 0, dst_width, dst_height); set_screen_scissor(pScrn, accel_state->ib, 0, 0, dst_width, dst_height); set_window_scissor(pScrn, accel_state->ib, 0, 0, dst_width, dst_height); - accel_state->vs_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + - accel_state->copy_vs_offset; - accel_state->ps_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + - accel_state->copy_ps_offset; +#if defined(XF86DRM_MODE) + if (info->cs) { + accel_state->vs_mc_addr = accel_state->copy_vs_offset; + accel_state->ps_mc_addr = accel_state->copy_ps_offset; + } else +#endif + { + accel_state->vs_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + + accel_state->copy_vs_offset; + accel_state->ps_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + + accel_state->copy_ps_offset; + } accel_state->vs_size = 512; accel_state->ps_size = 512; @@ -427,16 +474,19 @@ R600DoPrepareCopy(ScrnInfoPtr pScrn, /* flush SQ cache */ cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit, - accel_state->vs_size, accel_state->vs_mc_addr); + accel_state->vs_size, accel_state->vs_mc_addr, + accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0); vs_conf.shader_addr = accel_state->vs_mc_addr; vs_conf.num_gprs = 2; vs_conf.stack_size = 0; + vs_conf.bo = accel_state->shaders_bo; vs_setup (pScrn, accel_state->ib, &vs_conf); /* flush SQ cache */ cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit, - accel_state->ps_size, accel_state->ps_mc_addr); + accel_state->ps_size, accel_state->ps_mc_addr, + accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0); ps_conf.shader_addr = accel_state->ps_mc_addr; ps_conf.num_gprs = 1; @@ -444,18 +494,13 @@ R600DoPrepareCopy(ScrnInfoPtr pScrn, ps_conf.uncached_first_inst = 1; ps_conf.clamp_consts = 0; ps_conf.export_mode = 2; + ps_conf.bo = accel_state->shaders_bo; ps_setup (pScrn, accel_state->ib, &ps_conf); - accel_state->src_size[0] = src_pitch * src_height * (src_bpp/8); - accel_state->src_mc_addr[0] = src_offset; - accel_state->src_pitch[0] = src_pitch; - accel_state->src_width[0] = src_width; - accel_state->src_height[0] = src_height; - accel_state->src_bpp[0] = src_bpp; - /* flush texture cache */ cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit, - accel_state->src_size[0], accel_state->src_mc_addr[0]); + accel_state->src_size[0], accel_state->src_mc_addr[0], + accel_state->src_bo[0], RADEON_GEM_DOMAIN_VRAM, 0); /* Texture */ tex_res.id = 0; @@ -466,6 +511,8 @@ R600DoPrepareCopy(ScrnInfoPtr pScrn, tex_res.dim = SQ_TEX_DIM_2D; tex_res.base = accel_state->src_mc_addr[0]; tex_res.mip_base = accel_state->src_mc_addr[0]; + tex_res.bo = accel_state->src_bo[0]; + tex_res.mip_bo = accel_state->src_bo[0]; if (src_bpp == 8) { tex_res.format = FMT_8; tex_res.dst_sel_x = SQ_SEL_1; /* R */ @@ -512,20 +559,16 @@ R600DoPrepareCopy(ScrnInfoPtr pScrn, pmask |= 1; /* R */ if (planemask & 0xff000000) pmask |= 8; /* A */ + BEGIN_BATCH(6); EREG(accel_state->ib, CB_SHADER_MASK, (pmask << OUTPUT0_ENABLE_shift)); - EREG(accel_state->ib, R7xx_CB_SHADER_CONTROL, (RT0_ENABLE_bit)); EREG(accel_state->ib, CB_COLOR_CONTROL, RADEON_ROP[rop]); - - accel_state->dst_size = dst_pitch * dst_height * (dst_bpp/8); - accel_state->dst_mc_addr = dst_offset; - accel_state->dst_pitch = dst_pitch; - accel_state->dst_height = dst_height; - accel_state->dst_bpp = dst_bpp; + END_BATCH(); cb_conf.id = 0; cb_conf.w = accel_state->dst_pitch; cb_conf.h = dst_height; cb_conf.base = accel_state->dst_mc_addr; + cb_conf.bo = accel_state->dst_bo; if (dst_bpp == 8) { cb_conf.format = COLOR_8; cb_conf.comp_swap = 3; /* A */ @@ -540,14 +583,9 @@ R600DoPrepareCopy(ScrnInfoPtr pScrn, cb_conf.blend_clamp = 1; set_render_target(pScrn, accel_state->ib, &cb_conf); - EREG(accel_state->ib, PA_SU_SC_MODE_CNTL, (FACE_bit | - (POLYMODE_PTYPE__TRIANGLES << POLYMODE_FRONT_PTYPE_shift) | - (POLYMODE_PTYPE__TRIANGLES << POLYMODE_BACK_PTYPE_shift))); - EREG(accel_state->ib, DB_SHADER_CONTROL, ((1 << Z_ORDER_shift) | /* EARLY_Z_THEN_LATE_Z */ - DUAL_EXPORT_ENABLE_bit)); /* Only useful if no depth export */ - /* Interpolator setup */ /* export tex coord from VS */ + BEGIN_BATCH(18); EREG(accel_state->ib, SPI_VS_OUT_CONFIG, ((1 - 1) << VS_EXPORT_COUNT_shift)); EREG(accel_state->ib, SPI_VS_OUT_ID_0, (0 << SEMANTIC_0_shift)); @@ -561,6 +599,7 @@ R600DoPrepareCopy(ScrnInfoPtr pScrn, (0x01 << DEFAULT_VAL_shift) | SEL_CENTROID_bit)); EREG(accel_state->ib, SPI_INTERP_CONTROL_0, 0); + END_BATCH(); } static void @@ -589,10 +628,12 @@ R600DoCopy(ScrnInfoPtr pScrn) (info->ChipFamily == CHIP_FAMILY_RS880) || (info->ChipFamily == CHIP_FAMILY_RV710)) cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit, - accel_state->vb_size, accel_state->vb_mc_addr); + accel_state->vb_size, accel_state->vb_mc_addr, + accel_state->vb_bo, RADEON_GEM_DOMAIN_GTT, 0); else cp_set_surface_sync(pScrn, accel_state->ib, VC_ACTION_ENA_bit, - accel_state->vb_size, accel_state->vb_mc_addr); + accel_state->vb_size, accel_state->vb_mc_addr, + accel_state->vb_bo, RADEON_GEM_DOMAIN_GTT, 0); /* Vertex buffer setup */ vtx_res.id = SQ_VTX_RESOURCE_vs; @@ -600,6 +641,7 @@ R600DoCopy(ScrnInfoPtr pScrn) vtx_res.vtx_num_entries = accel_state->vb_size / 4; vtx_res.mem_req_size = 1; vtx_res.vb_addr = accel_state->vb_mc_addr; + vtx_res.bo = accel_state->vb_bo; set_vtx_resource (pScrn, accel_state->ib, &vtx_res); draw_conf.prim_type = DI_PT_RECTLIST; @@ -614,7 +656,8 @@ R600DoCopy(ScrnInfoPtr pScrn) /* sync dst surface */ cp_set_surface_sync(pScrn, accel_state->ib, (CB_ACTION_ENA_bit | CB0_DEST_BASE_ENA_bit), - accel_state->dst_size, accel_state->dst_mc_addr); + accel_state->dst_size, accel_state->dst_mc_addr, + accel_state->dst_bo, RADEON_GEM_DOMAIN_VRAM, 0); R600CPFlushIndirect(pScrn, accel_state->ib); } @@ -631,8 +674,7 @@ R600AppendCopyVertex(ScrnInfoPtr pScrn, if (((accel_state->vb_index + 3) * 16) > accel_state->vb_total) { R600DoCopy(pScrn); - accel_state->ib = RADEONCPGetBuffer(pScrn); - r600_vb_get(pScrn); + r600_cp_start(pScrn); } vb = (pointer)((char*)accel_state->vb_ptr+accel_state->vb_index*16); @@ -665,6 +707,8 @@ R600PrepareCopy(PixmapPtr pSrc, PixmapPtr pDst, RADEONInfoPtr info = RADEONPTR(pScrn); struct radeon_accel_state *accel_state = info->accel_state; + //return FALSE; + if (pSrc->drawable.bitsPerPixel == 24) RADEON_FALLBACK(("24bpp unsupported\n")); if (pDst->drawable.bitsPerPixel == 24) @@ -676,9 +720,25 @@ R600PrepareCopy(PixmapPtr pSrc, PixmapPtr pDst, accel_state->dst_pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8); accel_state->src_pitch[0] = exaGetPixmapPitch(pSrc) / (pSrc->drawable.bitsPerPixel / 8); - - accel_state->src_mc_addr[0] = exaGetPixmapOffset(pSrc) + info->fbLocation + pScrn->fbOffset; - accel_state->dst_mc_addr = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset; + accel_state->same_surface = FALSE; + +#if defined(XF86DRM_MODE) + if (info->cs) { + accel_state->src_mc_addr[0] = 0; + accel_state->dst_mc_addr = 0; + accel_state->src_bo[0] = radeon_get_pixmap_bo(pSrc); + accel_state->src_bo[1] = NULL; + accel_state->dst_bo = radeon_get_pixmap_bo(pDst); + if (accel_state->dst_bo == accel_state->src_bo[0]) + accel_state->same_surface = TRUE; + } else +#endif + { + accel_state->src_mc_addr[0] = exaGetPixmapOffset(pSrc) + info->fbLocation + pScrn->fbOffset; + accel_state->dst_mc_addr = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset; + if (exaGetPixmapOffset(pSrc) == exaGetPixmapOffset(pDst)) + accel_state->same_surface = TRUE; + } accel_state->src_width[0] = pSrc->drawable.width; accel_state->src_height[0] = pSrc->drawable.height; @@ -711,27 +771,48 @@ R600PrepareCopy(PixmapPtr pSrc, PixmapPtr pDst, accel_state->rop = rop; accel_state->planemask = planemask; - if (exaGetPixmapOffset(pSrc) == exaGetPixmapOffset(pDst)) { + if (accel_state->same_surface == TRUE) { unsigned long size = pDst->drawable.height * accel_state->dst_pitch * pDst->drawable.bitsPerPixel/8; - accel_state->same_surface = TRUE; - if (accel_state->copy_area) { - exaOffscreenFree(pDst->drawable.pScreen, accel_state->copy_area); - accel_state->copy_area = NULL; +#if defined(XF86DRM_MODE) + if (info->cs) { + if (accel_state->copy_area_bo) { + radeon_bo_unref(accel_state->copy_area_bo); + accel_state->copy_area_bo = NULL; + } + accel_state->copy_area_bo = radeon_bo_open(info->bufmgr, 0, size, 0, + RADEON_GEM_DOMAIN_VRAM, + 0); + if (accel_state->copy_area_bo == NULL) { + R600IBDiscard(pScrn, accel_state->ib); + return FALSE; + } + radeon_cs_space_add_persistent_bo(info->cs, accel_state->copy_area_bo, + 0, RADEON_GEM_DOMAIN_VRAM); + if (radeon_cs_space_check(info->cs)) { + radeon_bo_unref(accel_state->copy_area_bo); + accel_state->copy_area_bo = NULL; + R600IBDiscard(pScrn, accel_state->ib); + return FALSE; + } + accel_state->copy_area = (void*)accel_state->copy_area_bo; + } else +#endif + { + if (accel_state->copy_area) { + exaOffscreenFree(pDst->drawable.pScreen, accel_state->copy_area); + accel_state->copy_area = NULL; + } + accel_state->copy_area = exaOffscreenAlloc(pDst->drawable.pScreen, size, 256, TRUE, NULL, NULL); } - accel_state->copy_area = exaOffscreenAlloc(pDst->drawable.pScreen, size, 256, TRUE, NULL, NULL); - } else { - accel_state->same_surface = FALSE; - + } else R600DoPrepareCopy(pScrn, accel_state->src_pitch[0], pSrc->drawable.width, pSrc->drawable.height, - accel_state->src_mc_addr[0], pSrc->drawable.bitsPerPixel, + accel_state->src_mc_addr[0], accel_state->src_bo[0], pSrc->drawable.bitsPerPixel, accel_state->dst_pitch, pDst->drawable.width, pDst->drawable.height, - accel_state->dst_mc_addr, pDst->drawable.bitsPerPixel, + accel_state->dst_mc_addr, accel_state->dst_bo, pDst->drawable.bitsPerPixel, rop, planemask); - } - return TRUE; } @@ -759,9 +840,20 @@ R600OverlapCopy(PixmapPtr pDst, uint32_t dst_pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8); uint32_t dst_offset = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset; int i, hchunk, vchunk; + struct radeon_bo *dst_bo = NULL; + +#if defined(XF86DRM_MODE) + if (info->cs) { + dst_offset = 0; + dst_bo = radeon_get_pixmap_bo(pDst); + radeon_cs_space_add_persistent_bo(info->cs, dst_bo, + RADEON_GEM_DOMAIN_VRAM, 0); + radeon_cs_space_check(info->cs); + } +#endif - if (is_overlap(srcX, srcX + w, srcY, srcY + h, - dstX, dstX + w, dstY, dstY + h)) { + if (is_overlap(srcX, srcX + (w - 1), srcY, srcY + (h - 1), + dstX, dstX + (w - 1), dstY, dstY + (h - 1))) { /* Calculate height/width of non-overlapping area */ hchunk = (srcX < dstX) ? (dstX - srcX) : (srcX - dstX); vchunk = (srcY < dstY) ? (dstY - srcY) : (srcY - dstY); @@ -774,8 +866,10 @@ R600OverlapCopy(PixmapPtr pDst, if ((w / hchunk) <= (h / vchunk)) { /* reduce to horizontal */ if (srcY > dstY ) { /* diagonal up */ R600DoPrepareCopy(pScrn, - dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, - dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, + dst_pitch, pDst->drawable.width, pDst->drawable.height, + dst_offset, dst_bo, pDst->drawable.bitsPerPixel, + dst_pitch, pDst->drawable.width, pDst->drawable.height, + dst_offset, dst_bo, pDst->drawable.bitsPerPixel, accel_state->rop, accel_state->planemask); R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, vchunk); R600DoCopy(pScrn); @@ -784,8 +878,10 @@ R600OverlapCopy(PixmapPtr pDst, dstY = dstY + vchunk; } else { /* diagonal down */ R600DoPrepareCopy(pScrn, - dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, - dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, + dst_pitch, pDst->drawable.width, pDst->drawable.height, + dst_offset, dst_bo, pDst->drawable.bitsPerPixel, + dst_pitch, pDst->drawable.width, pDst->drawable.height, + dst_offset, dst_bo, pDst->drawable.bitsPerPixel, accel_state->rop, accel_state->planemask); R600AppendCopyVertex(pScrn, srcX, srcY + h - vchunk, dstX, dstY + h - vchunk, w, vchunk); R600DoCopy(pScrn); @@ -795,8 +891,10 @@ R600OverlapCopy(PixmapPtr pDst, } else { /* reduce to vertical */ if (srcX > dstX ) { /* diagonal left */ R600DoPrepareCopy(pScrn, - dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, - dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, + dst_pitch, pDst->drawable.width, pDst->drawable.height, + dst_offset, dst_bo, pDst->drawable.bitsPerPixel, + dst_pitch, pDst->drawable.width, pDst->drawable.height, + dst_offset, dst_bo, pDst->drawable.bitsPerPixel, accel_state->rop, accel_state->planemask); R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, hchunk, h); R600DoCopy(pScrn); @@ -805,8 +903,10 @@ R600OverlapCopy(PixmapPtr pDst, dstX = dstX + hchunk; } else { /* diagonal right */ R600DoPrepareCopy(pScrn, - dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, - dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, + dst_pitch, pDst->drawable.width, pDst->drawable.height, + dst_offset, dst_bo, pDst->drawable.bitsPerPixel, + dst_pitch, pDst->drawable.width, pDst->drawable.height, + dst_offset, dst_bo, pDst->drawable.bitsPerPixel, accel_state->rop, accel_state->planemask); R600AppendCopyVertex(pScrn, srcX + w - hchunk, srcY, dstX + w - hchunk, dstY, hchunk, h); R600DoCopy(pScrn); @@ -821,8 +921,10 @@ R600OverlapCopy(PixmapPtr pDst, /* copy right to left */ for (i = w; i > 0; i -= hchunk) { R600DoPrepareCopy(pScrn, - dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, - dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, + dst_pitch, pDst->drawable.width, pDst->drawable.height, + dst_offset, dst_bo, pDst->drawable.bitsPerPixel, + dst_pitch, pDst->drawable.width, pDst->drawable.height, + dst_offset, dst_bo, pDst->drawable.bitsPerPixel, accel_state->rop, accel_state->planemask); R600AppendCopyVertex(pScrn, srcX + i - hchunk, srcY, dstX + i - hchunk, dstY, hchunk, h); R600DoCopy(pScrn); @@ -831,8 +933,10 @@ R600OverlapCopy(PixmapPtr pDst, /* copy left to right */ for (i = 0; i < w; i += hchunk) { R600DoPrepareCopy(pScrn, - dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, - dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, + dst_pitch, pDst->drawable.width, pDst->drawable.height, + dst_offset, dst_bo, pDst->drawable.bitsPerPixel, + dst_pitch, pDst->drawable.width, pDst->drawable.height, + dst_offset, dst_bo, pDst->drawable.bitsPerPixel, accel_state->rop, accel_state->planemask); R600AppendCopyVertex(pScrn, srcX + i, srcY, dstX + i, dstY, hchunk, h); @@ -844,8 +948,10 @@ R600OverlapCopy(PixmapPtr pDst, /* copy top to bottom */ for (i = 0; i < h; i += vchunk) { R600DoPrepareCopy(pScrn, - dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, - dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, + dst_pitch, pDst->drawable.width, pDst->drawable.height, + dst_offset, dst_bo, pDst->drawable.bitsPerPixel, + dst_pitch, pDst->drawable.width, pDst->drawable.height, + dst_offset, dst_bo, pDst->drawable.bitsPerPixel, accel_state->rop, accel_state->planemask); if (vchunk > h - i) vchunk = h - i; @@ -856,8 +962,10 @@ R600OverlapCopy(PixmapPtr pDst, /* copy bottom to top */ for (i = h; i > 0; i -= vchunk) { R600DoPrepareCopy(pScrn, - dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, - dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, + dst_pitch, pDst->drawable.width, pDst->drawable.height, + dst_offset, dst_bo, pDst->drawable.bitsPerPixel, + dst_pitch, pDst->drawable.width, pDst->drawable.height, + dst_offset, dst_bo, pDst->drawable.bitsPerPixel, accel_state->rop, accel_state->planemask); if (vchunk > i) vchunk = i; @@ -868,8 +976,10 @@ R600OverlapCopy(PixmapPtr pDst, } } else { R600DoPrepareCopy(pScrn, - dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, - dst_pitch, pDst->drawable.width, pDst->drawable.height, dst_offset, pDst->drawable.bitsPerPixel, + dst_pitch, pDst->drawable.width, pDst->drawable.height, + dst_offset, dst_bo, pDst->drawable.bitsPerPixel, + dst_pitch, pDst->drawable.width, pDst->drawable.height, + dst_offset, dst_bo, pDst->drawable.bitsPerPixel, accel_state->rop, accel_state->planemask); R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, h); @@ -886,27 +996,46 @@ R600Copy(PixmapPtr pDst, ScrnInfoPtr pScrn = xf86Screens[pDst->drawable.pScreen->myNum]; RADEONInfoPtr info = RADEONPTR(pScrn); struct radeon_accel_state *accel_state = info->accel_state; + struct radeon_bo *bo = NULL; if (accel_state->same_surface && (srcX == dstX) && (srcY == dstY)) return; - if (accel_state->same_surface && is_overlap(srcX, srcX + w, srcY, srcY + h, dstX, dstX + w, dstY, dstY + h)) { +#if defined(XF86DRM_MODE) + if (info->cs) + bo = radeon_get_pixmap_bo(pDst); +#endif + + if (accel_state->same_surface && + is_overlap(srcX, srcX + (w - 1), srcY, srcY + (h - 1), + dstX, dstX + (w - 1), dstY, dstY + (h - 1))) { if (accel_state->copy_area) { uint32_t pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8); uint32_t orig_offset, tmp_offset; - tmp_offset = accel_state->copy_area->offset + info->fbLocation + pScrn->fbOffset; - orig_offset = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset; - +#if defined(XF86DRM_MODE) + if (info->cs) { + tmp_offset = 0; + orig_offset = 0; + } else +#endif + { + tmp_offset = accel_state->copy_area->offset + info->fbLocation + pScrn->fbOffset; + orig_offset = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset; + } R600DoPrepareCopy(pScrn, - pitch, pDst->drawable.width, pDst->drawable.height, orig_offset, pDst->drawable.bitsPerPixel, - pitch, pDst->drawable.width, pDst->drawable.height, tmp_offset, pDst->drawable.bitsPerPixel, + pitch, pDst->drawable.width, pDst->drawable.height, + orig_offset, bo, pDst->drawable.bitsPerPixel, + pitch, pDst->drawable.width, pDst->drawable.height, + tmp_offset, accel_state->copy_area_bo, pDst->drawable.bitsPerPixel, accel_state->rop, accel_state->planemask); R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, h); R600DoCopy(pScrn); R600DoPrepareCopy(pScrn, - pitch, pDst->drawable.width, pDst->drawable.height, tmp_offset, pDst->drawable.bitsPerPixel, - pitch, pDst->drawable.width, pDst->drawable.height, orig_offset, pDst->drawable.bitsPerPixel, + pitch, pDst->drawable.width, pDst->drawable.height, + tmp_offset, accel_state->copy_area_bo, pDst->drawable.bitsPerPixel, + pitch, pDst->drawable.width, pDst->drawable.height, + orig_offset, bo, pDst->drawable.bitsPerPixel, accel_state->rop, accel_state->planemask); R600AppendCopyVertex(pScrn, dstX, dstY, dstX, dstY, w, h); R600DoCopy(pScrn); @@ -914,11 +1043,20 @@ R600Copy(PixmapPtr pDst, R600OverlapCopy(pDst, srcX, srcY, dstX, dstY, w, h); } else if (accel_state->same_surface) { uint32_t pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8); - uint32_t offset = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset; + uint32_t offset; + +#if defined(XF86DRM_MODE) + if (info->cs) + offset = 0; + else +#endif + offset = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset; R600DoPrepareCopy(pScrn, - pitch, pDst->drawable.width, pDst->drawable.height, offset, pDst->drawable.bitsPerPixel, - pitch, pDst->drawable.width, pDst->drawable.height, offset, pDst->drawable.bitsPerPixel, + pitch, pDst->drawable.width, pDst->drawable.height, + offset, bo, pDst->drawable.bitsPerPixel, + pitch, pDst->drawable.width, pDst->drawable.height, + offset, bo, pDst->drawable.bitsPerPixel, accel_state->rop, accel_state->planemask); R600AppendCopyVertex(pScrn, srcX, srcY, dstX, dstY, w, h); R600DoCopy(pScrn); @@ -939,7 +1077,8 @@ R600DoneCopy(PixmapPtr pDst) R600DoCopy(pScrn); if (accel_state->copy_area) { - exaOffscreenFree(pDst->drawable.pScreen, accel_state->copy_area); + if (!info->cs) + exaOffscreenFree(pDst->drawable.pScreen, accel_state->copy_area); accel_state->copy_area = NULL; } @@ -1119,7 +1258,16 @@ static Bool R600TextureSetup(PicturePtr pPict, PixmapPtr pPix, CLEAR (tex_res); CLEAR (tex_samp); - accel_state->src_mc_addr[unit] = exaGetPixmapOffset(pPix) + info->fbLocation + pScrn->fbOffset; +#if defined(XF86DRM_MODE) + if (info->cs) { + accel_state->src_mc_addr[unit] = 0; + accel_state->src_bo[unit] = radeon_get_pixmap_bo(pPix); + radeon_cs_space_add_persistent_bo(info->cs, accel_state->src_bo[unit], + RADEON_GEM_DOMAIN_VRAM, 0); + radeon_cs_space_check(info->cs); + } else +#endif + accel_state->src_mc_addr[unit] = exaGetPixmapOffset(pPix) + info->fbLocation + pScrn->fbOffset; accel_state->src_pitch[unit] = exaGetPixmapPitch(pPix) / (pPix->drawable.bitsPerPixel / 8); accel_state->src_size[unit] = exaGetPixmapPitch(pPix) * pPix->drawable.height; @@ -1138,7 +1286,8 @@ static Bool R600TextureSetup(PicturePtr pPict, PixmapPtr pPix, /* flush texture cache */ cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit, - accel_state->src_size[unit], accel_state->src_mc_addr[unit]); + accel_state->src_size[unit], accel_state->src_mc_addr[unit], + accel_state->src_bo[unit], RADEON_GEM_DOMAIN_VRAM, 0); /* Texture */ tex_res.id = unit; @@ -1150,6 +1299,8 @@ static Bool R600TextureSetup(PicturePtr pPict, PixmapPtr pPix, tex_res.base = accel_state->src_mc_addr[unit]; tex_res.mip_base = accel_state->src_mc_addr[unit]; tex_res.format = R600TexFormats[i].card_fmt; + tex_res.bo = accel_state->src_bo[unit]; + tex_res.mip_bo = accel_state->src_bo[unit]; tex_res.request_size = 1; /* component swizzles */ @@ -1418,8 +1569,12 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture, cb_config_t cb_conf; shader_config_t vs_conf, ps_conf; + //return FALSE; /* return FALSE; */ + if (pDst->drawable.bitsPerPixel < 8 || pSrc->drawable.bitsPerPixel < 8) + return FALSE; + if (pMask) { accel_state->msk_pic = pMaskPicture; if (pMaskPicture->componentAlpha) { @@ -1438,7 +1593,15 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture, accel_state->src_alpha = FALSE; } - accel_state->dst_mc_addr = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset; +#if defined(XF86DRM_MODE) + if (info->cs) { + accel_state->dst_mc_addr = 0; + accel_state->dst_bo = radeon_get_pixmap_bo(pDst); + accel_state->src_bo[0] = NULL; + accel_state->src_bo[1] = NULL; + } else +#endif + accel_state->dst_mc_addr = exaGetPixmapOffset(pDst) + info->fbLocation + pScrn->fbOffset; accel_state->dst_pitch = exaGetPixmapPitch(pDst) / (pDst->drawable.bitsPerPixel / 8); accel_state->dst_size = exaGetPixmapPitch(pDst) * pDst->drawable.height; @@ -1455,18 +1618,17 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture, CLEAR (vs_conf); CLEAR (ps_conf); - accel_state->ib = RADEONCPGetBuffer(pScrn); - r600_vb_get(pScrn); + r600_cp_start(pScrn); /* Init */ +#if defined(XF86DRM_MODE) + if (info->cs) + accel_state->XInited3D = FALSE; +#endif start_3d(pScrn, accel_state->ib); set_default_state(pScrn, accel_state->ib); - /* Scissor / viewport */ - EREG(accel_state->ib, PA_CL_VTE_CNTL, VTX_XY_FMT_bit); - EREG(accel_state->ib, PA_CL_CLIP_CNTL, CLIP_DISABLE_bit); - set_generic_scissor(pScrn, accel_state->ib, 0, 0, pDst->drawable.width, pDst->drawable.height); set_screen_scissor(pScrn, accel_state->ib, 0, 0, pDst->drawable.width, pDst->drawable.height); set_window_scissor(pScrn, accel_state->ib, 0, 0, pDst->drawable.width, pDst->drawable.height); @@ -1488,16 +1650,31 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture, if (pMask) { set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_vs, (1 << 0)); - accel_state->ps_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + - accel_state->comp_mask_ps_offset; +#if defined(XF86DRM_MODE) + if (info->cs) + accel_state->ps_mc_addr = accel_state->comp_mask_ps_offset; + else +#endif + accel_state->ps_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + + accel_state->comp_mask_ps_offset; } else { set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_vs, (0 << 0)); - accel_state->ps_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + +#if defined(XF86DRM_MODE) + if (info->cs) + accel_state->ps_mc_addr = accel_state->comp_ps_offset; + else +#endif + accel_state->ps_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + accel_state->comp_ps_offset; } - accel_state->vs_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + - accel_state->comp_vs_offset; +#if defined(XF86DRM_MODE) + if (info->cs) + accel_state->vs_mc_addr = accel_state->comp_vs_offset; + else +#endif + accel_state->vs_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + + accel_state->comp_vs_offset; accel_state->vs_size = 512; accel_state->ps_size = 512; @@ -1506,16 +1683,19 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture, /* flush SQ cache */ cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit, - accel_state->vs_size, accel_state->vs_mc_addr); + accel_state->vs_size, accel_state->vs_mc_addr, + accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0); vs_conf.shader_addr = accel_state->vs_mc_addr; vs_conf.num_gprs = 3; vs_conf.stack_size = 1; + vs_conf.bo = accel_state->shaders_bo; vs_setup (pScrn, accel_state->ib, &vs_conf); /* flush SQ cache */ cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit, - accel_state->ps_size, accel_state->ps_mc_addr); + accel_state->ps_size, accel_state->ps_mc_addr, + accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0); ps_conf.shader_addr = accel_state->ps_mc_addr; ps_conf.num_gprs = 3; @@ -1523,10 +1703,11 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture, ps_conf.uncached_first_inst = 1; ps_conf.clamp_consts = 0; ps_conf.export_mode = 2; + ps_conf.bo = accel_state->shaders_bo; ps_setup (pScrn, accel_state->ib, &ps_conf); + BEGIN_BATCH(9); EREG(accel_state->ib, CB_SHADER_MASK, (0xf << OUTPUT0_ENABLE_shift)); - EREG(accel_state->ib, R7xx_CB_SHADER_CONTROL, (RT0_ENABLE_bit)); blendcntl = R600GetBlendCntl(op, pMaskPicture, pDstPicture->format); @@ -1540,12 +1721,14 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture, PER_MRT_BLEND_bit)); EREG(accel_state->ib, CB_BLEND0_CONTROL, blendcntl); } + END_BATCH(); cb_conf.id = 0; cb_conf.w = accel_state->dst_pitch; cb_conf.h = pDst->drawable.height; cb_conf.base = accel_state->dst_mc_addr; cb_conf.format = dst_format; + cb_conf.bo = accel_state->dst_bo; switch (pDstPicture->format) { case PICT_a8r8g8b8: @@ -1566,13 +1749,8 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture, cb_conf.blend_clamp = 1; set_render_target(pScrn, accel_state->ib, &cb_conf); - EREG(accel_state->ib, PA_SU_SC_MODE_CNTL, (FACE_bit | - (POLYMODE_PTYPE__TRIANGLES << POLYMODE_FRONT_PTYPE_shift) | - (POLYMODE_PTYPE__TRIANGLES << POLYMODE_BACK_PTYPE_shift))); - EREG(accel_state->ib, DB_SHADER_CONTROL, ((1 << Z_ORDER_shift) | /* EARLY_Z_THEN_LATE_Z */ - DUAL_EXPORT_ENABLE_bit)); /* Only useful if no depth export */ - /* Interpolator setup */ + BEGIN_BATCH(21); if (pMask) { /* export 2 tex coords from VS */ EREG(accel_state->ib, SPI_VS_OUT_CONFIG, ((2 - 1) << VS_EXPORT_COUNT_shift)); @@ -1599,6 +1777,7 @@ static Bool R600PrepareComposite(int op, PicturePtr pSrcPicture, (0x01 << DEFAULT_VAL_shift) | SEL_CENTROID_bit)); EREG(accel_state->ib, SPI_INTERP_CONTROL_0, 0); + END_BATCH(); return TRUE; } @@ -1632,8 +1811,7 @@ static void R600Composite(PixmapPtr pDst, if (((accel_state->vb_index + 3) * 24) > accel_state->vb_total) { R600DoneComposite(pDst); - accel_state->ib = RADEONCPGetBuffer(pScrn); - r600_vb_get(pScrn); + r600_cp_start(pScrn); } vb = (pointer)((char*)accel_state->vb_ptr+accel_state->vb_index*24); @@ -1671,8 +1849,7 @@ static void R600Composite(PixmapPtr pDst, } else { if (((accel_state->vb_index + 3) * 16) > accel_state->vb_total) { R600DoneComposite(pDst); - accel_state->ib = RADEONCPGetBuffer(pScrn); - r600_vb_get(pScrn); + r600_cp_start(pScrn); } vb = (pointer)((char*)accel_state->vb_ptr+accel_state->vb_index*16); @@ -1722,6 +1899,7 @@ static void R600DoneComposite(PixmapPtr pDst) vtx_res.vtx_num_entries = accel_state->vb_size / 4; vtx_res.mem_req_size = 1; vtx_res.vb_addr = accel_state->vb_mc_addr; + vtx_res.bo = accel_state->vb_bo; } else { accel_state->vb_size = accel_state->vb_index * 16; vtx_res.id = SQ_VTX_RESOURCE_vs; @@ -1729,6 +1907,7 @@ static void R600DoneComposite(PixmapPtr pDst) vtx_res.vtx_num_entries = accel_state->vb_size / 4; vtx_res.mem_req_size = 1; vtx_res.vb_addr = accel_state->vb_mc_addr; + vtx_res.bo = accel_state->vb_bo; } /* flush vertex cache */ if ((info->ChipFamily == CHIP_FAMILY_RV610) || @@ -1737,12 +1916,14 @@ static void R600DoneComposite(PixmapPtr pDst) (info->ChipFamily == CHIP_FAMILY_RS880) || (info->ChipFamily == CHIP_FAMILY_RV710)) cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit, - accel_state->vb_size, accel_state->vb_mc_addr); + accel_state->vb_size, accel_state->vb_mc_addr, + accel_state->vb_bo, RADEON_GEM_DOMAIN_GTT, 0); else cp_set_surface_sync(pScrn, accel_state->ib, VC_ACTION_ENA_bit, - accel_state->vb_size, accel_state->vb_mc_addr); + accel_state->vb_size, accel_state->vb_mc_addr, + accel_state->vb_bo, RADEON_GEM_DOMAIN_GTT, 0); - set_vtx_resource (pScrn, accel_state->ib, &vtx_res); + set_vtx_resource(pScrn, accel_state->ib, &vtx_res); draw_conf.prim_type = DI_PT_RECTLIST; draw_conf.vgt_draw_initiator = DI_SRC_SEL_AUTO_INDEX; @@ -1755,9 +1936,11 @@ static void R600DoneComposite(PixmapPtr pDst) wait_3d_idle_clean(pScrn, accel_state->ib); cp_set_surface_sync(pScrn, accel_state->ib, (CB_ACTION_ENA_bit | CB0_DEST_BASE_ENA_bit), - accel_state->dst_size, accel_state->dst_mc_addr); + accel_state->dst_size, accel_state->dst_mc_addr, + accel_state->dst_bo, RADEON_GEM_DOMAIN_VRAM, 0); R600CPFlushIndirect(pScrn, accel_state->ib); + } Bool @@ -1774,6 +1957,7 @@ R600CopyToVRAM(ScrnInfoPtr pScrn, int scratch_offset = 0, hpass, temph; char *dst; drmBufPtr scratch; + struct radeon_bo *bo = NULL; if (dst_pitch & 7) return FALSE; @@ -1816,8 +2000,10 @@ R600CopyToVRAM(ScrnInfoPtr pScrn, } /* blit from scratch to vram */ R600DoPrepareCopy(pScrn, - scratch_pitch, w, oldhpass, offset, bpp, - dst_pitch, dst_width, dst_height, dst_mc_addr, bpp, + scratch_pitch, w, oldhpass, + offset, bo, bpp, + dst_pitch, dst_width, dst_height, + dst_mc_addr, bo, bpp, 3, 0xffffffff); R600AppendCopyVertex(pScrn, 0, 0, x, y, w, oldhpass); R600DoCopy(pScrn); @@ -1863,6 +2049,7 @@ R600DownloadFromScreen(PixmapPtr pSrc, int x, int y, int w, int h, uint32_t scratch_pitch = scratch_pitch_bytes / (bpp / 8); int wpass = w * (bpp/8); drmBufPtr scratch; + struct radeon_bo *bo = NULL; /* RV740 seems to be particularly problematic with small xfers */ if ((info->ChipFamily == CHIP_FAMILY_RV740) && (w < 32 || h < 32)) @@ -1880,8 +2067,10 @@ R600DownloadFromScreen(PixmapPtr pSrc, int x, int y, int w, int h, /* blit from vram to scratch */ R600DoPrepareCopy(pScrn, - src_pitch, src_width, src_height, src_mc_addr, bpp, - scratch_pitch, src_width, hpass, scratch_mc_addr, bpp, + src_pitch, src_width, src_height, + src_mc_addr, bo, bpp, + scratch_pitch, src_width, hpass, + scratch_mc_addr, bo, bpp, 3, 0xffffffff); R600AppendCopyVertex(pScrn, x, y, 0, 0, w, hpass); R600DoCopy(pScrn); @@ -1897,8 +2086,10 @@ R600DownloadFromScreen(PixmapPtr pSrc, int x, int y, int w, int h, scratch_offset = scratch->total/2 - scratch_offset; /* blit from vram to scratch */ R600DoPrepareCopy(pScrn, - src_pitch, src_width, src_height, src_mc_addr, bpp, - scratch_pitch, src_width, hpass, scratch_mc_addr + scratch_offset, bpp, + src_pitch, src_width, src_height, + src_mc_addr, bo, bpp, + scratch_pitch, src_width, hpass, + scratch_mc_addr + scratch_offset, bo, bpp, 3, 0xffffffff); R600AppendCopyVertex(pScrn, x, y, 0, 0, w, hpass); R600DoCopy(pScrn); @@ -1940,7 +2131,12 @@ R600Sync(ScreenPtr pScreen, int marker) struct radeon_accel_state *accel_state = info->accel_state; if (accel_state->exaMarkerSynced != marker) { - RADEONWaitForIdleCP(pScrn); +#ifdef XF86DRM_MODE +#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4) + if (!info->cs) +#endif +#endif + RADEONWaitForIdleCP(pScrn); accel_state->exaMarkerSynced = marker; } @@ -1957,11 +2153,27 @@ R600AllocShaders(ScrnInfoPtr pScrn, ScreenPtr pScreen) accel_state->shaders = NULL; - accel_state->shaders = exaOffscreenAlloc(pScreen, size, 256, - TRUE, NULL, NULL); +#ifdef XF86DRM_MODE +#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4) + if (info->cs) { + accel_state->shaders_bo = radeon_bo_open(info->bufmgr, 0, size, 0, + RADEON_GEM_DOMAIN_VRAM, 0); + if (accel_state->shaders_bo == NULL) { + ErrorF("Allocating shader failed\n"); + return FALSE; + } + return TRUE; + } else +#endif +#endif + { + accel_state->shaders = exaOffscreenAlloc(pScreen, size, 256, + TRUE, NULL, NULL); + + if (accel_state->shaders == NULL) + return FALSE; + } - if (accel_state->shaders == NULL) - return FALSE; return TRUE; } @@ -1972,8 +2184,21 @@ R600LoadShaders(ScrnInfoPtr pScrn) struct radeon_accel_state *accel_state = info->accel_state; RADEONChipFamily ChipSet = info->ChipFamily; uint32_t *shader; - - shader = (pointer)((char *)info->FB + accel_state->shaders->offset); +#ifdef XF86DRM_MODE +#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4) + int ret; + + if (info->cs) { + ret = radeon_bo_map(accel_state->shaders_bo, 1); + if (ret) { + FatalError("failed to map shader %d\n", ret); + return FALSE; + } + shader = accel_state->shaders_bo->ptr; + } else +#endif +#endif + shader = (pointer)((char *)info->FB + accel_state->shaders->offset); /* solid vs --------------------------------------- */ accel_state->solid_vs_offset = 0; @@ -2011,6 +2236,14 @@ R600LoadShaders(ScrnInfoPtr pScrn) accel_state->xv_ps_offset = 4096; R600_xv_ps(ChipSet, shader + accel_state->xv_ps_offset / 4); +#ifdef XF86DRM_MODE +#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4) + if (info->cs) { + radeon_bo_unmap(accel_state->shaders_bo); + } +#endif +#endif + return TRUE; } @@ -2039,7 +2272,6 @@ R600FinishAccess(PixmapPtr pPix, int index) } - Bool R600DrawInit(ScreenPtr pScreen) { @@ -2065,19 +2297,45 @@ R600DrawInit(ScreenPtr pScreen) info->accel_state->exa->MarkSync = R600MarkSync; info->accel_state->exa->WaitMarker = R600Sync; - info->accel_state->exa->PrepareAccess = R600PrepareAccess; - info->accel_state->exa->FinishAccess = R600FinishAccess; - - /* AGP seems to have problems with gart transfers */ - if (info->accelDFS) { - info->accel_state->exa->UploadToScreen = R600UploadToScreen; - info->accel_state->exa->DownloadFromScreen = R600DownloadFromScreen; +#ifdef XF86DRM_MODE +#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4) + if (info->cs) { + info->accel_state->exa->CreatePixmap = RADEONEXACreatePixmap; + info->accel_state->exa->DestroyPixmap = RADEONEXADestroyPixmap; + info->accel_state->exa->PixmapIsOffscreen = RADEONEXAPixmapIsOffscreen; + info->accel_state->exa->PrepareAccess = RADEONPrepareAccess_CS; + info->accel_state->exa->FinishAccess = RADEONFinishAccess_CS; + info->accel_state->exa->UploadToScreen = NULL; + info->accel_state->exa->DownloadFromScreen = NULL; + } else +#endif +#endif + { + info->accel_state->exa->PrepareAccess = R600PrepareAccess; + info->accel_state->exa->FinishAccess = R600FinishAccess; + + /* AGP seems to have problems with gart transfers */ + if (info->accelDFS) { + info->accel_state->exa->UploadToScreen = R600UploadToScreen; + info->accel_state->exa->DownloadFromScreen = R600DownloadFromScreen; + } } info->accel_state->exa->flags = EXA_OFFSCREEN_PIXMAPS; #ifdef EXA_SUPPORTS_PREPARE_AUX info->accel_state->exa->flags |= EXA_SUPPORTS_PREPARE_AUX; #endif + +#ifdef XF86DRM_MODE +#ifdef EXA_HANDLES_PIXMAPS + if (info->cs) { + info->accel_state->exa->flags |= EXA_HANDLES_PIXMAPS; +//#ifdef EXA_MIXED_PIXMAPS +// info->accel_state->exa->flags |= EXA_MIXED_PIXMAPS; +//#endif + } +#endif +#endif info->accel_state->exa->pixmapOffsetAlign = 256; info->accel_state->exa->pixmapPitchAlign = 256; @@ -2104,11 +2362,21 @@ R600DrawInit(ScreenPtr pScreen) return FALSE; } - if (!info->gartLocation) - return FALSE; +#ifdef XF86DRM_MODE +#if (EXA_VERSION_MAJOR == 2 && EXA_VERSION_MINOR >= 4) + if (!info->cs) +#endif +#endif + if (!info->gartLocation) + return FALSE; info->accel_state->XInited3D = FALSE; info->accel_state->copy_area = NULL; + info->accel_state->src_bo[0] = NULL; + info->accel_state->src_bo[1] = NULL; + info->accel_state->dst_bo = NULL; + info->accel_state->copy_area_bo = NULL; + info->accel_state->vb_bo = NULL; if (!R600AllocShaders(pScrn, pScreen)) return FALSE; |