/* * Copyright 2008 Advanced Micro Devices, Inc. * * Permission is hereby granted, free of charge, to any person obtaining a * copy of this software and associated documentation files (the "Software"), * to deal in the Software without restriction, including without limitation * the rights to use, copy, modify, merge, publish, distribute, sublicense, * and/or sell copies of the Software, and to permit persons to whom the * Software is furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. * * Author: Alex Deucher * */ #ifdef HAVE_CONFIG_H #include "config.h" #endif #include "xf86.h" #include "exa.h" #include "radeon.h" #include "radeon_reg.h" #include "r600_shader.h" #include "r600_reg.h" #include "r600_state.h" #include "radeon_video.h" #include #include "fourcc.h" #include "damage.h" #include "radeon_vbo.h" /* Parameters for ITU-R BT.601 and ITU-R BT.709 colour spaces note the difference to the parameters used in overlay are due to 10bit vs. float calcs */ static REF_TRANSFORM trans[2] = { {1.1643, 0.0, 1.5960, -0.3918, -0.8129, 2.0172, 0.0}, /* BT.601 */ {1.1643, 0.0, 1.7927, -0.2132, -0.5329, 2.1124, 0.0} /* BT.709 */ }; static void R600DoneTexturedVideo(ScrnInfoPtr pScrn) { r600_finish_op(pScrn, 16); } void R600DisplayTexturedVideo(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) { RADEONInfoPtr info = RADEONPTR(pScrn); struct radeon_accel_state *accel_state = info->accel_state; PixmapPtr pPixmap = pPriv->pPixmap; BoxPtr pBox = REGION_RECTS(&pPriv->clip); int nBox = REGION_NUM_RECTS(&pPriv->clip); int dstxoff, dstyoff; cb_config_t cb_conf; tex_resource_t tex_res; tex_sampler_t tex_samp; shader_config_t vs_conf, ps_conf; /* * y' = y - .0625 * u' = u - .5 * v' = v - .5; * * r = 1.1643 * y' + 0.0 * u' + 1.5958 * v' * g = 1.1643 * y' - 0.39173 * u' - 0.81290 * v' * b = 1.1643 * y' + 2.017 * u' + 0.0 * v' * * DP3 might look like the straightforward solution * but we'd need to move the texture yuv values in * the same reg for this to work. Therefore use MADs. * Brightness just adds to the off constant. * Contrast is multiplication of luminance. * Saturation and hue change the u and v coeffs. * Default values (before adjustments - depend on colorspace): * yco = 1.1643 * uco = 0, -0.39173, 2.017 * vco = 1.5958, -0.8129, 0 * off = -0.0625 * yco + -0.5 * uco[r] + -0.5 * vco[r], * -0.0625 * yco + -0.5 * uco[g] + -0.5 * vco[g], * -0.0625 * yco + -0.5 * uco[b] + -0.5 * vco[b], * * temp = MAD(yco, yuv.yyyy, off) * temp = MAD(uco, yuv.uuuu, temp) * result = MAD(vco, yuv.vvvv, temp) */ /* TODO: calc consts in the shader */ const float Loff = -0.0627; const float Coff = -0.502; float uvcosf, uvsinf; float yco; float uco[3], vco[3], off[3]; float bright, cont, gamma; int ref = pPriv->transform_index; Bool needgamma = FALSE; float ps_alu_consts[12]; float vs_alu_consts[4]; cont = RTFContrast(pPriv->contrast); bright = RTFBrightness(pPriv->brightness); gamma = (float)pPriv->gamma / 1000.0; uvcosf = RTFSaturation(pPriv->saturation) * cos(RTFHue(pPriv->hue)); uvsinf = RTFSaturation(pPriv->saturation) * sin(RTFHue(pPriv->hue)); /* overlay video also does pre-gamma contrast/sat adjust, should we? */ yco = trans[ref].RefLuma * cont; uco[0] = -trans[ref].RefRCr * uvsinf; uco[1] = trans[ref].RefGCb * uvcosf - trans[ref].RefGCr * uvsinf; uco[2] = trans[ref].RefBCb * uvcosf; vco[0] = trans[ref].RefRCr * uvcosf; vco[1] = trans[ref].RefGCb * uvsinf + trans[ref].RefGCr * uvcosf; vco[2] = trans[ref].RefBCb * uvsinf; off[0] = Loff * yco + Coff * (uco[0] + vco[0]) + bright; off[1] = Loff * yco + Coff * (uco[1] + vco[1]) + bright; off[2] = Loff * yco + Coff * (uco[2] + vco[2]) + bright; // XXX gamma = 1.0; if (gamma != 1.0) { needgamma = TRUE; /* note: gamma correction is out = in ^ gamma; gpu can only do LG2/EX2 therefore we transform into in ^ gamma = 2 ^ (log2(in) * gamma). Lots of scalar ops, unfortunately (better solution?) - without gamma that's 3 inst, with gamma it's 10... could use different gamma factors per channel, if that's of any use. */ } /* setup the ps consts */ ps_alu_consts[0] = off[0]; ps_alu_consts[1] = off[1]; ps_alu_consts[2] = off[2]; ps_alu_consts[3] = yco; ps_alu_consts[4] = uco[0]; ps_alu_consts[5] = uco[1]; ps_alu_consts[6] = uco[2]; ps_alu_consts[7] = gamma; ps_alu_consts[8] = vco[0]; ps_alu_consts[9] = vco[1]; ps_alu_consts[10] = vco[2]; ps_alu_consts[11] = 0.0; CLEAR (cb_conf); CLEAR (tex_res); CLEAR (tex_samp); CLEAR (vs_conf); CLEAR (ps_conf); #if defined(XF86DRM_MODE) if (info->cs) { accel_state->dst_mc_addr = 0; accel_state->src_mc_addr[0] = 0; accel_state->src_bo[0] = pPriv->src_bo[pPriv->currentBuffer]; accel_state->src_bo[1] = NULL; accel_state->dst_bo = radeon_get_pixmap_bo(pPixmap); radeon_cs_space_reset_bos(info->cs); radeon_cs_space_add_persistent_bo(info->cs, accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0); radeon_cs_space_add_persistent_bo(info->cs, accel_state->src_bo[0], RADEON_GEM_DOMAIN_GTT | RADEON_GEM_DOMAIN_VRAM, 0); radeon_cs_space_add_persistent_bo(info->cs, accel_state->dst_bo, 0, RADEON_GEM_DOMAIN_VRAM); if (radeon_cs_space_check(info->cs)) return; } else #endif { accel_state->dst_mc_addr = exaGetPixmapOffset(pPixmap) + info->fbLocation + pScrn->fbOffset; accel_state->src_mc_addr[0] = pPriv->src_offset + info->fbLocation + pScrn->fbOffset; } accel_state->dst_pitch = exaGetPixmapPitch(pPixmap) / (pPixmap->drawable.bitsPerPixel / 8); accel_state->src_pitch[0] = pPriv->src_pitch; /* bad pitch */ if (accel_state->src_pitch[0] & 7) return; if (accel_state->dst_pitch & 7) return; #ifdef COMPOSITE dstxoff = -pPixmap->screen_x + pPixmap->drawable.x; dstyoff = -pPixmap->screen_y + pPixmap->drawable.y; #else dstxoff = 0; dstyoff = 0; #endif radeon_vbo_check(pScrn, 16); r600_cp_start(pScrn); set_default_state(pScrn, accel_state->ib); set_generic_scissor(pScrn, accel_state->ib, 0, 0, pPixmap->drawable.width, pPixmap->drawable.height); set_screen_scissor(pScrn, accel_state->ib, 0, 0, pPixmap->drawable.width, pPixmap->drawable.height); set_window_scissor(pScrn, accel_state->ib, 0, 0, pPixmap->drawable.width, pPixmap->drawable.height); #if defined(XF86DRM_MODE) if (info->cs) { accel_state->vs_mc_addr = accel_state->xv_vs_offset; accel_state->ps_mc_addr = accel_state->xv_ps_offset; } else #endif { accel_state->vs_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + accel_state->xv_vs_offset; accel_state->ps_mc_addr = info->fbLocation + pScrn->fbOffset + accel_state->shaders->offset + accel_state->xv_ps_offset; } /* PS bool constant */ switch(pPriv->id) { case FOURCC_YV12: case FOURCC_I420: set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_ps, (1 << 0)); break; case FOURCC_UYVY: case FOURCC_YUY2: default: set_bool_consts(pScrn, accel_state->ib, SQ_BOOL_CONST_ps, (0 << 0)); break; } accel_state->vs_size = 512; accel_state->ps_size = 512; /* Shader */ /* flush SQ cache */ cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit, accel_state->vs_size, accel_state->vs_mc_addr, accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0); vs_conf.shader_addr = accel_state->vs_mc_addr; vs_conf.num_gprs = 2; vs_conf.stack_size = 0; vs_conf.bo = accel_state->shaders_bo; vs_setup (pScrn, accel_state->ib, &vs_conf, RADEON_GEM_DOMAIN_VRAM); /* flush SQ cache */ cp_set_surface_sync(pScrn, accel_state->ib, SH_ACTION_ENA_bit, accel_state->ps_size, accel_state->ps_mc_addr, accel_state->shaders_bo, RADEON_GEM_DOMAIN_VRAM, 0); ps_conf.shader_addr = accel_state->ps_mc_addr; ps_conf.num_gprs = 3; ps_conf.stack_size = 1; ps_conf.uncached_first_inst = 1; ps_conf.clamp_consts = 0; ps_conf.export_mode = 2; ps_conf.bo = accel_state->shaders_bo; ps_setup (pScrn, accel_state->ib, &ps_conf, RADEON_GEM_DOMAIN_VRAM); /* PS alu constants */ set_alu_consts(pScrn, accel_state->ib, SQ_ALU_CONSTANT_ps, sizeof(ps_alu_consts) / SQ_ALU_CONSTANT_offset, ps_alu_consts); /* Texture */ switch(pPriv->id) { case FOURCC_YV12: case FOURCC_I420: accel_state->src_size[0] = accel_state->src_pitch[0] * pPriv->h; /* flush texture cache */ cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit, accel_state->src_size[0], accel_state->src_mc_addr[0], accel_state->src_bo[0], RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT, 0); /* Y texture */ tex_res.id = 0; tex_res.w = pPriv->w; tex_res.h = pPriv->h; tex_res.pitch = accel_state->src_pitch[0]; tex_res.depth = 0; tex_res.dim = SQ_TEX_DIM_2D; tex_res.base = accel_state->src_mc_addr[0]; tex_res.mip_base = accel_state->src_mc_addr[0]; tex_res.bo = accel_state->src_bo[0]; tex_res.mip_bo = accel_state->src_bo[0]; tex_res.format = FMT_8; tex_res.dst_sel_x = SQ_SEL_X; /* Y */ tex_res.dst_sel_y = SQ_SEL_1; tex_res.dst_sel_z = SQ_SEL_1; tex_res.dst_sel_w = SQ_SEL_1; tex_res.request_size = 1; tex_res.base_level = 0; tex_res.last_level = 0; tex_res.perf_modulation = 0; tex_res.interlaced = 0; set_tex_resource (pScrn, accel_state->ib, &tex_res, RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT); /* Y sampler */ tex_samp.id = 0; tex_samp.clamp_x = SQ_TEX_CLAMP_LAST_TEXEL; tex_samp.clamp_y = SQ_TEX_CLAMP_LAST_TEXEL; tex_samp.clamp_z = SQ_TEX_WRAP; /* xxx: switch to bicubic */ tex_samp.xy_mag_filter = SQ_TEX_XY_FILTER_BILINEAR; tex_samp.xy_min_filter = SQ_TEX_XY_FILTER_BILINEAR; tex_samp.z_filter = SQ_TEX_Z_FILTER_NONE; tex_samp.mip_filter = 0; /* no mipmap */ set_tex_sampler (pScrn, accel_state->ib, &tex_samp); /* U or V texture */ cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit, accel_state->src_size[0] / 4, accel_state->src_mc_addr[0] + pPriv->planev_offset, accel_state->src_bo[0], RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT, 0); tex_res.id = 1; tex_res.format = FMT_8; tex_res.w = pPriv->w >> 1; tex_res.h = pPriv->h >> 1; tex_res.pitch = RADEON_ALIGN(accel_state->src_pitch[0] >> 1, 256); tex_res.dst_sel_x = SQ_SEL_X; /* V or U */ tex_res.dst_sel_y = SQ_SEL_1; tex_res.dst_sel_z = SQ_SEL_1; tex_res.dst_sel_w = SQ_SEL_1; tex_res.interlaced = 0; tex_res.base = accel_state->src_mc_addr[0] + pPriv->planev_offset; tex_res.mip_base = accel_state->src_mc_addr[0] + pPriv->planev_offset; set_tex_resource (pScrn, accel_state->ib, &tex_res, RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT); /* U or V sampler */ tex_samp.id = 1; set_tex_sampler (pScrn, accel_state->ib, &tex_samp); /* U or V texture */ cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit, accel_state->src_size[0] / 4, accel_state->src_mc_addr[0] + pPriv->planeu_offset, accel_state->src_bo[0], RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT, 0); tex_res.id = 2; tex_res.format = FMT_8; tex_res.w = pPriv->w >> 1; tex_res.h = pPriv->h >> 1; tex_res.pitch = RADEON_ALIGN(accel_state->src_pitch[0] >> 1, 256); tex_res.dst_sel_x = SQ_SEL_X; /* V or U */ tex_res.dst_sel_y = SQ_SEL_1; tex_res.dst_sel_z = SQ_SEL_1; tex_res.dst_sel_w = SQ_SEL_1; tex_res.interlaced = 0; tex_res.base = accel_state->src_mc_addr[0] + pPriv->planeu_offset; tex_res.mip_base = accel_state->src_mc_addr[0] + pPriv->planeu_offset; set_tex_resource (pScrn, accel_state->ib, &tex_res, RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT); /* UV sampler */ tex_samp.id = 2; set_tex_sampler (pScrn, accel_state->ib, &tex_samp); break; case FOURCC_UYVY: case FOURCC_YUY2: default: accel_state->src_size[0] = accel_state->src_pitch[0] * pPriv->h; /* flush texture cache */ cp_set_surface_sync(pScrn, accel_state->ib, TC_ACTION_ENA_bit, accel_state->src_size[0], accel_state->src_mc_addr[0], accel_state->src_bo[0], RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT, 0); /* Y texture */ tex_res.id = 0; tex_res.w = pPriv->w; tex_res.h = pPriv->h; tex_res.pitch = accel_state->src_pitch[0] >> 1; tex_res.depth = 0; tex_res.dim = SQ_TEX_DIM_2D; tex_res.base = accel_state->src_mc_addr[0]; tex_res.mip_base = accel_state->src_mc_addr[0]; tex_res.bo = accel_state->src_bo[0]; tex_res.mip_bo = accel_state->src_bo[0]; tex_res.format = FMT_8_8; if (pPriv->id == FOURCC_UYVY) tex_res.dst_sel_x = SQ_SEL_Y; /* Y */ else tex_res.dst_sel_x = SQ_SEL_X; /* Y */ tex_res.dst_sel_y = SQ_SEL_1; tex_res.dst_sel_z = SQ_SEL_1; tex_res.dst_sel_w = SQ_SEL_1; tex_res.request_size = 1; tex_res.base_level = 0; tex_res.last_level = 0; tex_res.perf_modulation = 0; tex_res.interlaced = 0; set_tex_resource (pScrn, accel_state->ib, &tex_res, RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT); /* Y sampler */ tex_samp.id = 0; tex_samp.clamp_x = SQ_TEX_CLAMP_LAST_TEXEL; tex_samp.clamp_y = SQ_TEX_CLAMP_LAST_TEXEL; tex_samp.clamp_z = SQ_TEX_WRAP; /* xxx: switch to bicubic */ tex_samp.xy_mag_filter = SQ_TEX_XY_FILTER_BILINEAR; tex_samp.xy_min_filter = SQ_TEX_XY_FILTER_BILINEAR; tex_samp.z_filter = SQ_TEX_Z_FILTER_NONE; tex_samp.mip_filter = 0; /* no mipmap */ set_tex_sampler (pScrn, accel_state->ib, &tex_samp); /* UV texture */ tex_res.id = 1; tex_res.format = FMT_8_8_8_8; tex_res.w = pPriv->w >> 1; tex_res.h = pPriv->h; tex_res.pitch = accel_state->src_pitch[0] >> 2; if (pPriv->id == FOURCC_UYVY) { tex_res.dst_sel_x = SQ_SEL_X; /* V */ tex_res.dst_sel_y = SQ_SEL_Z; /* U */ } else { tex_res.dst_sel_x = SQ_SEL_Y; /* V */ tex_res.dst_sel_y = SQ_SEL_W; /* U */ } tex_res.dst_sel_z = SQ_SEL_1; tex_res.dst_sel_w = SQ_SEL_1; tex_res.interlaced = 0; tex_res.base = accel_state->src_mc_addr[0]; tex_res.mip_base = accel_state->src_mc_addr[0]; set_tex_resource (pScrn, accel_state->ib, &tex_res, RADEON_GEM_DOMAIN_VRAM | RADEON_GEM_DOMAIN_GTT); /* UV sampler */ tex_samp.id = 1; set_tex_sampler (pScrn, accel_state->ib, &tex_samp); break; } /* Render setup */ BEGIN_BATCH(6); EREG(accel_state->ib, CB_TARGET_MASK, (0x0f << TARGET0_ENABLE_shift)); EREG(accel_state->ib, CB_COLOR_CONTROL, (0xcc << ROP3_shift)); /* copy */ END_BATCH(); cb_conf.id = 0; cb_conf.w = accel_state->dst_pitch; cb_conf.h = pPixmap->drawable.height; cb_conf.base = accel_state->dst_mc_addr; cb_conf.bo = accel_state->dst_bo; switch (pPixmap->drawable.bitsPerPixel) { case 16: if (pPixmap->drawable.depth == 15) { cb_conf.format = COLOR_1_5_5_5; cb_conf.comp_swap = 1; /* ARGB */ } else { cb_conf.format = COLOR_5_6_5; cb_conf.comp_swap = 2; /* RGB */ } break; case 32: cb_conf.format = COLOR_8_8_8_8; cb_conf.comp_swap = 1; /* ARGB */ break; default: return; } cb_conf.source_format = 1; cb_conf.blend_clamp = 1; set_render_target(pScrn, accel_state->ib, &cb_conf, RADEON_GEM_DOMAIN_VRAM); /* Interpolator setup */ /* export tex coords from VS */ BEGIN_BATCH(18); EREG(accel_state->ib, SPI_VS_OUT_CONFIG, ((1 - 1) << VS_EXPORT_COUNT_shift)); EREG(accel_state->ib, SPI_VS_OUT_ID_0, (0 << SEMANTIC_0_shift)); /* Enabling flat shading needs both FLAT_SHADE_bit in SPI_PS_INPUT_CNTL_x * *and* FLAT_SHADE_ENA_bit in SPI_INTERP_CONTROL_0 */ EREG(accel_state->ib, SPI_PS_IN_CONTROL_0, ((1 << NUM_INTERP_shift))); EREG(accel_state->ib, SPI_PS_IN_CONTROL_1, 0); EREG(accel_state->ib, SPI_PS_INPUT_CNTL_0 + (0 <<2), ((0 << SEMANTIC_shift) | (0x03 << DEFAULT_VAL_shift) | SEL_CENTROID_bit)); EREG(accel_state->ib, SPI_INTERP_CONTROL_0, 0); END_BATCH(); vs_alu_consts[0] = 1.0 / pPriv->w; vs_alu_consts[1] = 1.0 / pPriv->h; vs_alu_consts[2] = 0.0; vs_alu_consts[3] = 0.0; /* VS alu constants */ set_alu_consts(pScrn, accel_state->ib, SQ_ALU_CONSTANT_vs, sizeof(vs_alu_consts) / SQ_ALU_CONSTANT_offset, vs_alu_consts); if (pPriv->vsync) { xf86CrtcPtr crtc; if (pPriv->desired_crtc) crtc = pPriv->desired_crtc; else crtc = radeon_pick_best_crtc(pScrn, pPriv->drw_x, pPriv->drw_x + pPriv->dst_w, pPriv->drw_y, pPriv->drw_y + pPriv->dst_h); if (crtc) cp_wait_vline_sync(pScrn, accel_state->ib, pPixmap, crtc, pPriv->drw_y - crtc->y, (pPriv->drw_y - crtc->y) + pPriv->dst_h); } while (nBox--) { int srcX, srcY, srcw, srch; int dstX, dstY, dstw, dsth; float *vb; dstX = pBox->x1 + dstxoff; dstY = pBox->y1 + dstyoff; dstw = pBox->x2 - pBox->x1; dsth = pBox->y2 - pBox->y1; srcX = pPriv->src_x; srcX += ((pBox->x1 - pPriv->drw_x) * pPriv->src_w) / pPriv->dst_w; srcY = pPriv->src_y; srcY += ((pBox->y1 - pPriv->drw_y) * pPriv->src_h) / pPriv->dst_h; srcw = (pPriv->src_w * dstw) / pPriv->dst_w; srch = (pPriv->src_h * dsth) / pPriv->dst_h; vb = radeon_vbo_space(pScrn, 16); vb[0] = (float)dstX; vb[1] = (float)dstY; vb[2] = (float)srcX; vb[3] = (float)srcY; vb[4] = (float)dstX; vb[5] = (float)(dstY + dsth); vb[6] = (float)srcX; vb[7] = (float)(srcY + srch); vb[8] = (float)(dstX + dstw); vb[9] = (float)(dstY + dsth); vb[10] = (float)(srcX + srcw); vb[11] = (float)(srcY + srch); radeon_vbo_commit(pScrn); pBox++; } R600DoneTexturedVideo(pScrn); DamageDamageRegion(pPriv->pDraw, &pPriv->clip); }