diff options
author | Matthieu Herrb <matthieu@cvs.openbsd.org> | 2009-08-25 18:51:46 +0000 |
---|---|---|
committer | Matthieu Herrb <matthieu@cvs.openbsd.org> | 2009-08-25 18:51:46 +0000 |
commit | 269f87e08fd080a8849c7bbadaedbf53ba1186c3 (patch) | |
tree | a30cd6e744358b3167711a0f9b63963b82914266 /driver/xf86-video-ati/src/r600_textured_videofuncs.c | |
parent | 94721b7febf9ff5092d1d32a3e378ceaaf294b89 (diff) |
update do xf86-video-ati 6.12.2
This has been in snapshots for weeks. ok oga@, todd@.
Diffstat (limited to 'driver/xf86-video-ati/src/r600_textured_videofuncs.c')
-rw-r--r-- | driver/xf86-video-ati/src/r600_textured_videofuncs.c | 518 |
1 files changed, 518 insertions, 0 deletions
diff --git a/driver/xf86-video-ati/src/r600_textured_videofuncs.c b/driver/xf86-video-ati/src/r600_textured_videofuncs.c new file mode 100644 index 000000000..88745d5c0 --- /dev/null +++ b/driver/xf86-video-ati/src/r600_textured_videofuncs.c @@ -0,0 +1,518 @@ +/* + * Copyright 2008 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Author: Alex Deucher <alexander.deucher@amd.com> + * + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "xf86.h" + +#include "exa.h" + +#include "radeon.h" +#include "radeon_reg.h" +#include "r600_shader.h" +#include "r600_reg.h" +#include "r600_state.h" + +#include "radeon_video.h" + +#include <X11/extensions/Xv.h> +#include "fourcc.h" + +#include "damage.h" + +static void +R600DoneTexturedVideo(ScrnInfoPtr pScrn) +{ + RADEONInfoPtr info = RADEONPTR(pScrn); + struct radeon_accel_state *accel_state = info->accel_state; + draw_config_t draw_conf; + vtx_resource_t vtx_res; + + CLEAR (draw_conf); + CLEAR (vtx_res); + + if (accel_state->vb_index == 0) { + R600IBDiscard(pScrn, accel_state->ib); + return; + } + + accel_state->vb_mc_addr = info->gartLocation + info->dri->bufStart + + (accel_state->ib->idx * accel_state->ib->total) + (accel_state->ib->total / 2); + accel_state->vb_size = accel_state->vb_index * 16; + + /* flush vertex cache */ + if ((info->ChipFamily == CHIP_FAMILY_RV610) || + (info->ChipFamily == CHIP_FAMILY_RV620) || + (info->ChipFamily == CHIP_FAMILY_RS780) || + (info->ChipFamily == CHIP_FAMILY_RV710)) + cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit, + accel_state->vb_size, accel_state->vb_mc_addr); + else + cp_set_surface_sync(pScrn, accel_state->ib, VC_ACTION_ENA_bit, + accel_state->vb_size, accel_state->vb_mc_addr); + + /* Vertex buffer setup */ + vtx_res.id = SQ_VTX_RESOURCE_vs; + vtx_res.vtx_size_dw = 16 / 4; + vtx_res.vtx_num_entries = accel_state->vb_size / 4; + vtx_res.mem_req_size = 1; + vtx_res.vb_addr = accel_state->vb_mc_addr; + set_vtx_resource (pScrn, accel_state->ib, &vtx_res); + + draw_conf.prim_type = DI_PT_RECTLIST; + draw_conf.vgt_draw_initiator = DI_SRC_SEL_AUTO_INDEX; + draw_conf.num_instances = 1; + draw_conf.num_indices = vtx_res.vtx_num_entries / vtx_res.vtx_size_dw; + draw_conf.index_type = DI_INDEX_SIZE_16_BIT; + + draw_auto(pScrn, accel_state->ib, &draw_conf); + + wait_3d_idle_clean(pScrn, accel_state->ib); + + /* sync destination surface */ + cp_set_surface_sync(pScrn, accel_state->ib, (CB_ACTION_ENA_bit | CB0_DEST_BASE_ENA_bit), + accel_state->dst_size, accel_state->dst_mc_addr); + + R600CPFlushIndirect(pScrn, accel_state->ib); +} + +void +R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) +{ + RADEONInfoPtr info = RADEONPTR(pScrn); + struct radeon_accel_state *accel_state = info->accel_state; + PixmapPtr pPixmap = pPriv->pPixmap; + BoxPtr pBox = REGION_RECTS(&pPriv->clip); + int nBox = REGION_NUM_RECTS(&pPriv->clip); + int dstxoff, dstyoff; + cb_config_t cb_conf; + tex_resource_t tex_res; + tex_sampler_t tex_samp; + shader_config_t vs_conf, ps_conf; + int uv_offset; + static float ps_alu_consts[] = { + 1.0, 0.0, 1.4020, 0, /* r - c[0] */ + 1.0, -0.34414, -0.71414, 0, /* g - c[1] */ + 1.0, 1.7720, 0.0, 0, /* b - c[2] */ + /* Constants for undoing Y'CbCr scaling + * - Y' is scaled from 16:235 + * - Cb/Cr are scaled from 16:240 + * Unscaled value N' = N * N_mul + N_shift (N' in range [-0.5, 0.5]) + * Vector is [Y_mul, Y_shfit, C_mul, C_shift] + */ + 256.0/219.0, -16.0/219.0, 256.0/224.0, -128.0/224.0, + }; + + CLEAR (cb_conf); + CLEAR (tex_res); + CLEAR (tex_samp); + CLEAR (vs_conf); + CLEAR (ps_conf); + + accel_state->dst_pitch = exaGetPixmapPitch(pPixmap) / (pPixmap->drawable.bitsPerPixel / 8); + accel_state->src_pitch[0] = pPriv->src_pitch; + + /* bad pitch */ + if (accel_state->src_pitch[0] & 7) + return; + if (accel_state->dst_pitch & 7) + return; + +#ifdef COMPOSITE + dstxoff = -pPixmap->screen_x + pPixmap->drawable.x; + dstyoff = -pPixmap->screen_y + pPixmap->drawable.y; +#else + dstxoff = 0; + dstyoff = 0; +#endif + + accel_state->ib = RADEONCPGetBuffer(pScrn); + + /* Init */ + start_3d(pScrn, accel_state->ib); + + set_default_state(pScrn, accel_state->ib); + + /* Scissor / viewport */ + EREG(accel_state->ib, PA_CL_VTE_CNTL, VTX_XY_FMT_bit); + EREG(accel_state->ib, PA_CL_CLIP_CNTL, CLIP_DISABLE_bit); + + accel_state->vs_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + + accel_state->xv_vs_offset; + + accel_state->ps_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + + accel_state->xv_ps_offset; + + /* PS bool constant */ + switch(pPriv->id) { + case FOURCC_YV12: + case FOURCC_I420: + set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_ps, (1 << 0)); + break; + case FOURCC_UYVY: + case FOURCC_YUY2: + default: + set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_ps, (0 << 0)); + break; + } + + accel_state->vs_size = 512; + accel_state->ps_size = 512; + + /* Shader */ + + /* flush SQ cache */ + cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit, + accel_state->vs_size, accel_state->vs_mc_addr); + + vs_conf.shader_addr = accel_state->vs_mc_addr; + vs_conf.num_gprs = 2; + vs_conf.stack_size = 0; + vs_setup (pScrn, accel_state->ib, &vs_conf); + + /* flush SQ cache */ + cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit, + accel_state->ps_size, accel_state->ps_mc_addr); + + ps_conf.shader_addr = accel_state->ps_mc_addr; + ps_conf.num_gprs = 3; + ps_conf.stack_size = 1; + ps_conf.uncached_first_inst = 1; + ps_conf.clamp_consts = 0; + ps_conf.export_mode = 2; + ps_setup (pScrn, accel_state->ib, &ps_conf); + + /* PS alu constants */ + set_alu_consts(pScrn, accel_state->ib, SQ_ALU_CONSTANT_ps, + sizeof(ps_alu_consts) / SQ_ALU_CONSTANT_offset, ps_alu_consts); + + /* Texture */ + switch(pPriv->id) { + case FOURCC_YV12: + case FOURCC_I420: + accel_state->src_mc_addr[0] = pPriv->src_offset; + accel_state->src_size[0] = accel_state->src_pitch[0] * pPriv->h; + + /* flush texture cache */ + cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit, accel_state->src_size[0], + accel_state->src_mc_addr[0]); + + /* Y texture */ + tex_res.id = 0; + tex_res.w = pPriv->w; + tex_res.h = pPriv->h; + tex_res.pitch = accel_state->src_pitch[0]; + tex_res.depth = 0; + tex_res.dim = SQ_TEX_DIM_2D; + tex_res.base = accel_state->src_mc_addr[0]; + tex_res.mip_base = accel_state->src_mc_addr[0]; + + tex_res.format = FMT_8; + tex_res.dst_sel_x = SQ_SEL_X; /* Y */ + tex_res.dst_sel_y = SQ_SEL_1; + tex_res.dst_sel_z = SQ_SEL_1; + tex_res.dst_sel_w = SQ_SEL_1; + + tex_res.request_size = 1; + tex_res.base_level = 0; + tex_res.last_level = 0; + tex_res.perf_modulation = 0; + tex_res.interlaced = 0; + set_tex_resource (pScrn, accel_state->ib, &tex_res); + + /* Y sampler */ + tex_samp.id = 0; + tex_samp.clamp_x = SQ_TEX_CLAMP_LAST_TEXEL; + tex_samp.clamp_y = SQ_TEX_CLAMP_LAST_TEXEL; + tex_samp.clamp_z = SQ_TEX_WRAP; + + /* xxx: switch to bicubic */ + tex_samp.xy_mag_filter = SQ_TEX_XY_FILTER_BILINEAR; + tex_samp.xy_min_filter = SQ_TEX_XY_FILTER_BILINEAR; + + tex_samp.z_filter = SQ_TEX_Z_FILTER_NONE; + tex_samp.mip_filter = 0; /* no mipmap */ + set_tex_sampler (pScrn, accel_state->ib, &tex_samp); + + /* U or V texture */ + uv_offset = accel_state->src_pitch[0] * pPriv->h; + uv_offset = (uv_offset + 255) & ~255; + + cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit, + accel_state->src_size[0] / 4, + accel_state->src_mc_addr[0] + uv_offset); + + tex_res.id = 1; + tex_res.format = FMT_8; + tex_res.w = pPriv->w >> 1; + tex_res.h = pPriv->h >> 1; + tex_res.pitch = accel_state->src_pitch[0] >> 1; + tex_res.dst_sel_x = SQ_SEL_X; /* V or U */ + tex_res.dst_sel_y = SQ_SEL_1; + tex_res.dst_sel_z = SQ_SEL_1; + tex_res.dst_sel_w = SQ_SEL_1; + tex_res.interlaced = 0; + + tex_res.base = accel_state->src_mc_addr[0] + uv_offset; + tex_res.mip_base = accel_state->src_mc_addr[0] + uv_offset; + set_tex_resource (pScrn, accel_state->ib, &tex_res); + + /* U or V sampler */ + tex_samp.id = 1; + set_tex_sampler (pScrn, accel_state->ib, &tex_samp); + + /* U or V texture */ + uv_offset += ((accel_state->src_pitch[0] >> 1) * (pPriv->h >> 1)); + uv_offset = (uv_offset + 255) & ~255; + + cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit, + accel_state->src_size[0] / 4, + accel_state->src_mc_addr[0] + uv_offset); + + tex_res.id = 2; + tex_res.format = FMT_8; + tex_res.w = pPriv->w >> 1; + tex_res.h = pPriv->h >> 1; + tex_res.pitch = accel_state->src_pitch[0] >> 1; + tex_res.dst_sel_x = SQ_SEL_X; /* V or U */ + tex_res.dst_sel_y = SQ_SEL_1; + tex_res.dst_sel_z = SQ_SEL_1; + tex_res.dst_sel_w = SQ_SEL_1; + tex_res.interlaced = 0; + + tex_res.base = accel_state->src_mc_addr[0] + uv_offset; + tex_res.mip_base = accel_state->src_mc_addr[0] + uv_offset; + set_tex_resource (pScrn, accel_state->ib, &tex_res); + + /* UV sampler */ + tex_samp.id = 2; + set_tex_sampler (pScrn, accel_state->ib, &tex_samp); + break; + case FOURCC_UYVY: + case FOURCC_YUY2: + default: + accel_state->src_mc_addr[0] = pPriv->src_offset; + accel_state->src_size[0] = accel_state->src_pitch[0] * pPriv->h; + + /* flush texture cache */ + cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit, accel_state->src_size[0], + accel_state->src_mc_addr[0]); + + /* Y texture */ + tex_res.id = 0; + tex_res.w = pPriv->w; + tex_res.h = pPriv->h; + tex_res.pitch = accel_state->src_pitch[0] >> 1; + tex_res.depth = 0; + tex_res.dim = SQ_TEX_DIM_2D; + tex_res.base = accel_state->src_mc_addr[0]; + tex_res.mip_base = accel_state->src_mc_addr[0]; + + tex_res.format = FMT_8_8; + if (pPriv->id == FOURCC_UYVY) + tex_res.dst_sel_x = SQ_SEL_Y; /* Y */ + else + tex_res.dst_sel_x = SQ_SEL_X; /* Y */ + tex_res.dst_sel_y = SQ_SEL_1; + tex_res.dst_sel_z = SQ_SEL_1; + tex_res.dst_sel_w = SQ_SEL_1; + + tex_res.request_size = 1; + tex_res.base_level = 0; + tex_res.last_level = 0; + tex_res.perf_modulation = 0; + tex_res.interlaced = 0; + set_tex_resource (pScrn, accel_state->ib, &tex_res); + + /* Y sampler */ + tex_samp.id = 0; + tex_samp.clamp_x = SQ_TEX_CLAMP_LAST_TEXEL; + tex_samp.clamp_y = SQ_TEX_CLAMP_LAST_TEXEL; + tex_samp.clamp_z = SQ_TEX_WRAP; + + /* xxx: switch to bicubic */ + tex_samp.xy_mag_filter = SQ_TEX_XY_FILTER_BILINEAR; + tex_samp.xy_min_filter = SQ_TEX_XY_FILTER_BILINEAR; + + tex_samp.z_filter = SQ_TEX_Z_FILTER_NONE; + tex_samp.mip_filter = 0; /* no mipmap */ + set_tex_sampler (pScrn, accel_state->ib, &tex_samp); + + /* UV texture */ + tex_res.id = 1; + tex_res.format = FMT_8_8_8_8; + tex_res.w = pPriv->w >> 1; + tex_res.h = pPriv->h; + tex_res.pitch = accel_state->src_pitch[0] >> 2; + if (pPriv->id == FOURCC_UYVY) { + tex_res.dst_sel_x = SQ_SEL_X; /* V */ + tex_res.dst_sel_y = SQ_SEL_Z; /* U */ + } else { + tex_res.dst_sel_x = SQ_SEL_Y; /* V */ + tex_res.dst_sel_y = SQ_SEL_W; /* U */ + } + tex_res.dst_sel_z = SQ_SEL_1; + tex_res.dst_sel_w = SQ_SEL_1; + tex_res.interlaced = 0; + + tex_res.base = accel_state->src_mc_addr[0]; + tex_res.mip_base = accel_state->src_mc_addr[0]; + set_tex_resource (pScrn, accel_state->ib, &tex_res); + + /* UV sampler */ + tex_samp.id = 1; + set_tex_sampler (pScrn, accel_state->ib, &tex_samp); + break; + } + + /* Render setup */ + EREG(accel_state->ib, CB_SHADER_MASK, (0x0f << OUTPUT0_ENABLE_shift)); + EREG(accel_state->ib, R7xx_CB_SHADER_CONTROL, (RT0_ENABLE_bit)); + EREG(accel_state->ib, CB_COLOR_CONTROL, (0xcc << ROP3_shift)); /* copy */ + + cb_conf.id = 0; + + accel_state->dst_mc_addr = exaGetPixmapOffset(pPixmap) + info->fbLocation + pScrn->fbOffset; + + cb_conf.w = accel_state->dst_pitch; + cb_conf.h = pPixmap->drawable.height; + cb_conf.base = accel_state->dst_mc_addr; + + switch (pPixmap->drawable.bitsPerPixel) { + case 16: + if (pPixmap->drawable.depth == 15) { + cb_conf.format = COLOR_1_5_5_5; + cb_conf.comp_swap = 1; /* ARGB */ + } else { + cb_conf.format = COLOR_5_6_5; + cb_conf.comp_swap = 2; /* RGB */ + } + break; + case 32: + cb_conf.format = COLOR_8_8_8_8; + cb_conf.comp_swap = 1; /* ARGB */ + break; + default: + return; + } + + cb_conf.source_format = 1; + cb_conf.blend_clamp = 1; + set_render_target(pScrn, accel_state->ib, &cb_conf); + + EREG(accel_state->ib, PA_SU_SC_MODE_CNTL, (FACE_bit | + (POLYMODE_PTYPE__TRIANGLES << POLYMODE_FRONT_PTYPE_shift) | + (POLYMODE_PTYPE__TRIANGLES << POLYMODE_BACK_PTYPE_shift))); + EREG(accel_state->ib, DB_SHADER_CONTROL, ((1 << Z_ORDER_shift) | /* EARLY_Z_THEN_LATE_Z */ + DUAL_EXPORT_ENABLE_bit)); /* Only useful if no depth export */ + + /* Interpolator setup */ + /* export tex coords from VS */ + EREG(accel_state->ib, SPI_VS_OUT_CONFIG, ((1 - 1) << VS_EXPORT_COUNT_shift)); + EREG(accel_state->ib, SPI_VS_OUT_ID_0, (0 << SEMANTIC_0_shift)); + + /* Enabling flat shading needs both FLAT_SHADE_bit in SPI_PS_INPUT_CNTL_x + * *and* FLAT_SHADE_ENA_bit in SPI_INTERP_CONTROL_0 */ + EREG(accel_state->ib, SPI_PS_IN_CONTROL_0, ((1 << NUM_INTERP_shift))); + EREG(accel_state->ib, SPI_PS_IN_CONTROL_1, 0); + EREG(accel_state->ib, SPI_PS_INPUT_CNTL_0 + (0 <<2), ((0 << SEMANTIC_shift) | + (0x03 << DEFAULT_VAL_shift) | + SEL_CENTROID_bit)); + EREG(accel_state->ib, SPI_INTERP_CONTROL_0, 0); + + + if (pPriv->vsync) { + xf86CrtcPtr crtc = radeon_xv_pick_best_crtc(pScrn, + pPriv->drw_x, + pPriv->drw_x + pPriv->dst_w, + pPriv->drw_y, + pPriv->drw_y + pPriv->dst_h); + if (crtc) { + RADEONCrtcPrivatePtr radeon_crtc = crtc->driver_private; + + cp_wait_vline_sync(pScrn, accel_state->ib, pPixmap, + radeon_crtc->crtc_id, + pPriv->drw_y - crtc->y, + (pPriv->drw_y - crtc->y) + pPriv->dst_h); + } + } + + accel_state->vb_index = 0; + + while (nBox--) { + int srcX, srcY, srcw, srch; + int dstX, dstY, dstw, dsth; + float *vb; + + if (((accel_state->vb_index + 3) * 16) > (accel_state->ib->total / 2)) { + R600DoneTexturedVideo(pScrn); + accel_state->vb_index = 0; + accel_state->ib = RADEONCPGetBuffer(pScrn); + } + + vb = (pointer)((char*)accel_state->ib->address + + (accel_state->ib->total / 2) + + accel_state->vb_index * 16); + + dstX = pBox->x1 + dstxoff; + dstY = pBox->y1 + dstyoff; + dstw = pBox->x2 - pBox->x1; + dsth = pBox->y2 - pBox->y1; + + srcX = ((pBox->x1 - pPriv->drw_x) * + pPriv->src_w) / pPriv->dst_w; + srcY = ((pBox->y1 - pPriv->drw_y) * + pPriv->src_h) / pPriv->dst_h; + + srcw = (pPriv->src_w * dstw) / pPriv->dst_w; + srch = (pPriv->src_h * dsth) / pPriv->dst_h; + + vb[0] = (float)dstX; + vb[1] = (float)dstY; + vb[2] = (float)srcX / pPriv->w; + vb[3] = (float)srcY / pPriv->h; + + vb[4] = (float)dstX; + vb[5] = (float)(dstY + dsth); + vb[6] = (float)srcX / pPriv->w; + vb[7] = (float)(srcY + srch) / pPriv->h; + + vb[8] = (float)(dstX + dstw); + vb[9] = (float)(dstY + dsth); + vb[10] = (float)(srcX + srcw) / pPriv->w; + vb[11] = (float)(srcY + srch) / pPriv->h; + + accel_state->vb_index += 3; + + pBox++; + } + + R600DoneTexturedVideo(pScrn); + + DamageDamageRegion(pPriv->pDraw, &pPriv->clip); +} |