diff options
author | Gabriele Gorla <gorlik@yahoo.com> | 2007-07-19 22:14:51 -0400 |
---|---|---|
committer | Alex Deucher <alex@t41p.hsd1.va.comcast.net> | 2007-07-19 22:14:51 -0400 |
commit | 041caa230b91a017881394c59d76d663d647529c (patch) | |
tree | e462163406d6f1184964aacd2f1a9f7a7fad6775 /src | |
parent | 7b080d993ceda7b181e0611ed0c997c8aecc9ad7 (diff) |
Add Xv support for Millennium II chips
Bug 11537
Diffstat (limited to 'src')
-rw-r--r-- | src/mga_video.c | 780 |
1 files changed, 778 insertions, 2 deletions
diff --git a/src/mga_video.c b/src/mga_video.c index 30ae6fc..a2d7979 100644 --- a/src/mga_video.c +++ b/src/mga_video.c @@ -63,6 +63,10 @@ static void MGAResetVideoOverlay(ScrnInfoPtr); static void MGAVideoTimerCallback(ScrnInfoPtr pScrn, Time time); +static XF86VideoAdaptorPtr MGASetupImageVideoILOAD(ScreenPtr); +static int MGAPutImageILOAD(ScrnInfoPtr, short, short, short, short, short, + short, short, short, int, unsigned char*, short, + short, Bool, RegionPtr, pointer, DrawablePtr); #define MAKE_ATOM(a) MakeAtom(a, sizeof(a) - 1, TRUE) @@ -91,12 +95,27 @@ void MGAInitVideo(ScreenPtr pScreen) if((pScrn->bitsPerPixel != 8) && !pMga->NoAccel && (pMga->SecondCrtc == FALSE) && - ((pMga->Chipset == PCI_CHIP_MGAG200) || + ((pMga->Chipset == PCI_CHIP_MGA2164) || + (pMga->Chipset == PCI_CHIP_MGA2164_AGP) || +/* (pMga->Chipset == PCI_CHIP_MGA2064) || */ + (pMga->Chipset == PCI_CHIP_MGAG200) || (pMga->Chipset == PCI_CHIP_MGAG200_PCI) || (pMga->Chipset == PCI_CHIP_MGAG400) || (pMga->Chipset == PCI_CHIP_MGAG550))) { - if((pMga->Overlay8Plus24 || pMga->TexturedVideo) && + if( (pMga->Chipset == PCI_CHIP_MGA2164) || +/* (pMga->Chipset == PCI_CHIP_MGA2064) || */ + (pMga->Chipset == PCI_CHIP_MGA2164_AGP) + ) { + xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Using MGA 2164W ILOAD video\n"); + xf86DrvMsg(pScrn->scrnIndex, X_INFO, + "This is an experimental driver and may not work on your machine.\n"); + + newAdaptor = MGASetupImageVideoILOAD(pScreen); + pMga->TexturedVideo = TRUE; + /* ^^^ this is not really true but the ILOAD scaler shares + much more code with the textured video than the overlay */ + } else if((pMga->Overlay8Plus24 || pMga->TexturedVideo) && (pScrn->bitsPerPixel != 24)) { xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Using texture video\n"); @@ -1258,3 +1277,760 @@ MGAInitOffscreenImages(ScreenPtr pScreen) xf86XVRegisterOffscreenImages(pScreen, offscreenImages, num); } + + +/* Matrox MGA 2164W Xv extension support. +* The extension is implemented as a HOST->FB image load in YUV format. +* I decided not to use real hardware overlay since on the Millennium II +* it would limit the size of the frame buffer to 4Mb (even on a 16Mb +* card) due to an hardware limitation. +* Author: Gabriele Gorla (gorlik@yahoo.com) +* Based on the MGA-Gxxx Xv extension by: Mark Vojkovich + */ + +/* This code is still in alpha stage. Only YUV->RGB conversion + and horizontal scaling are hardware accelerated. + All 4 FOURCC formats supported by X should be supported. + It has been tested only on my DEC XP1000 at 1024x768x32 under + linux 2.6.18 with X.org 7.1.1 (debian alpha) + + Bug reports and success/failure stories are greatly appreciated. +*/ + +/* #define DEBUG_MGA2164 */ +/* #define CUSTOM_MEMCOPY */ +#define MGA2164_SWFILTER + + +static XF86VideoAdaptorPtr +MGASetupImageVideoILOAD(ScreenPtr pScreen) +{ + ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; + XF86VideoAdaptorPtr adapt; + MGAPtr pMga = MGAPTR(pScrn); + + adapt = MGAAllocAdaptor(pScrn, FALSE); + + adapt->type = XvWindowMask | XvInputMask | XvImageMask; + adapt->flags = 0; + adapt->name = "Matrox Millennium II ILOAD Video Engine"; + adapt->nEncodings = 1; + adapt->pEncodings = &DummyEncoding[1]; + adapt->nFormats = NUM_FORMATS; + adapt->pFormats = Formats; + adapt->nPorts = MGA_MAX_PORTS; + adapt->pAttributes = NULL; + adapt->nAttributes = 0; + + /* number of supported color formats */ + adapt->pImages = Images; + adapt->nImages = 4; + + adapt->PutVideo = NULL; + adapt->PutStill = NULL; + adapt->GetVideo = NULL; + adapt->GetStill = NULL; + adapt->StopVideo = MGAStopVideo; + + adapt->SetPortAttribute = MGASetPortAttributeTexture; + adapt->GetPortAttribute = MGAGetPortAttributeTexture; + adapt->QueryBestSize = MGAQueryBestSize; + adapt->PutImage = MGAPutImageILOAD; + adapt->QueryImageAttributes = MGAQueryImageAttributes; + + REGION_INIT(pScreen, &(pMga->portPrivate->clip), NullBox, 0); + + return adapt; +} + +/* this function is optimized for alpha. It might be better also for +other load/store risc architectures but I never tested on anything else +than my ev56 */ +static void CopyMungedScanline_AXP(CARD32 *fb_ptr, short src_w, + CARD32 *tsp, CARD32 *tpu, CARD32 *tpv) +{ + CARD32 k,y0,y1,u,v; + + for(k=src_w/8;k;k--) { + y0=*tsp; + y1=*(tsp+1); + u=*tpu; + v=*tpv; + + *(fb_ptr)=(y1&0x000000ff)|((y1&0x0000ff00)<<8) | + (v&0x00ff0000)<<8 | (u&0x00ff0000)>>8; + *(fb_ptr+1)=(y1&0x000000ff)|((y1&0x0000ff00)<<8) | + (v&0x00ff0000)<<8 | (u&0x00ff0000)>>8; + + *(fb_ptr+2)=(y0&0x000000ff)|((y0&0x0000ff00)<<8) | + (v&0x000000ff)<<24 | (u&0x000000ff)<<8; + *(fb_ptr+3)=(y0&0x000000ff)|((y0&0x0000ff00)<<8) | + (v&0x000000ff)<<24 | (u&0x000000ff)<<8; + + /*correct below*/ + /* *(fb_ptr)=(y0&0x000000ff)|((y0&0x0000ff00)<<8) | + (v&0x000000ff)<<24 | (u&0x000000ff)<<8; + *(fb_ptr+1)=((y0&0x00ff0000)>>16)|((y0&0xff000000)>>8) | + (v&0x0000ff00)<<16 | (u&0x0000ff00); + *(fb_ptr+2)=(y1&0x000000ff)|((y1&0x0000ff00)<<8) | + (v&0x00ff0000)<<8 | (u&0x00ff0000)>>8; + *(fb_ptr+3)=((y1&0x00ff0000)>>16)|((y1&0xff000000)>>8) | + (v&0xff000000) | (u&0xff000000)>>16; */ + + tsp+=2; tpu++; tpv++; + fb_ptr+=4; + } +} + +static void CopyMungedScanline_AXP2(CARD32 *fb_ptr, short src_w, + CARD32 *tsp, CARD32 *tpu, CARD32 *tpv) +{ + CARD8 *y, *u, *v; + int k; + y=(CARD8 *)tsp; + u=(CARD8 *)tpu; + v=(CARD8 *)tpv; + + for(k=src_w/8;k;k--) { + fb_ptr[0]=y[0] | y[1]<<16 | v[0]<<24 | u[0]<<8; + fb_ptr[1]=y[2] | y[3]<<16 | v[1]<<24 | u[1]<<8; + fb_ptr[2]=y[4] | y[5]<<16 | v[2]<<24 | u[2]<<8; + fb_ptr[3]=y[6] | y[7]<<16 | v[3]<<24 | u[3]<<8; + + y+=8; u+=4; v+=4; + fb_ptr+=4; + } +} + + +static void CopyMungedScanlineFilter_AXP(CARD32 *fb_ptr, short src_w, + CARD32 *tsp1, CARD32 *tpu1, CARD32 *tpv1, + CARD32 *tsp2, CARD32 *tpu2, CARD32 *tpv2, + int beta, int xds ) +{ + unsigned int k,y0_1,y1_1,y0_2,y1_2,u,v; + int yf[8], uf[4], vf[4]; + int oneminbeta = 0xff - beta; + + for(k=xds*src_w/8;k;k--) { + y0_1=*tsp1; + y1_1=*(tsp1+1); + y0_2=*tsp2; + y1_2=*(tsp2+1); + u=*tpu1; + v=*tpv1; + + tsp1+=2; tsp2+=2; tpu1++; tpv1++; + yf[0] = ((y0_1&0x000000ff)*oneminbeta + (y0_2&0x000000ff)*beta )>>8; + yf[1] = (((y0_1&0x0000ff00)>>8)*oneminbeta + ((y0_2&0x0000ff00)>>8)*beta )>>8; + yf[2] = (((y0_1&0x00ff0000)>>16)*oneminbeta + ((y0_2&0x00ff0000)>>16)*beta )>>8; + yf[3] = (((y0_1&0xff000000)>>24)*oneminbeta + ((y0_2&0xff000000)>>24)*beta )>>8; + yf[4] = ((y1_1&0x000000ff)*oneminbeta + (y1_2&0x000000ff)*beta )>>8; + yf[5] = (((y1_1&0x0000ff00)>>8)*oneminbeta + ((y1_2&0x0000ff00)>>8)*beta )>>8; + yf[6] = (((y1_1&0x00ff0000)>>16)*oneminbeta + ((y1_2&0x00ff0000)>>16)*beta )>>8; + yf[7] = (((y1_1&0xff000000)>>24)*oneminbeta + ((y1_2&0xff000000)>>24)*beta )>>8; + + /* FIXME: there is still no filtering on u and v */ + uf[0]=(u&0x000000ff); + uf[1]=(u&0x0000ff00)>>8; + uf[2]=(u&0x00ff0000)>>16; + uf[3]=(u&0xff000000)>>24; + + vf[0]=(v&0x000000ff); + vf[1]=(v&0x0000ff00)>>8; + vf[2]=(v&0x00ff0000)>>16; + vf[3]=(v&0xff000000)>>24; + + switch(xds) { + case 1: + *(fb_ptr)=(yf[0]) | (yf[1]<<16) | + vf[0]<<24 | uf[0]<<8; + *(fb_ptr+1)=(yf[2]) | (yf[3]<<16) | + vf[1]<<24 | uf[1]<<8; + *(fb_ptr+2)=(yf[4]) | (yf[5]<<16) | + vf[2]<<24 | uf[2]<<8; + *(fb_ptr+3)=(yf[6]) | (yf[7]<<16) | + vf[3]<<24 | uf[3]<<8; + fb_ptr+=4; + break; + + case 2: + *(fb_ptr)=(yf[0]+yf[1])/2 | (((yf[2]+yf[3])/2)<<16) | + ((vf[0]+vf[1])/2 )<<24 | ((uf[0]+uf[1])/2)<<8; + *(fb_ptr+1)=(yf[4]+yf[5])/2 | ( ((yf[6]+yf[7])/2) <<16) | + ((vf[2]+vf[3])/2 )<<24 | ((uf[2]+uf[3])/2)<<8; + fb_ptr+=2; + break; + + case 4: + *(fb_ptr)=(yf[0]+yf[1]+yf[2]+yf[3])/4 | (((yf[4]+yf[5]+yf[6]+yf[7])/4)<<16) | + ((vf[0]+vf[1]+vf[2]+vf[3])/4 )<<24 | ((uf[0]+uf[1]+uf[2]+uf[3])/4)<<8; + fb_ptr+=1; + break; + + default: + break; + } + } +} + +static void CopyMungedScanlineFilterDown_AXP(CARD32 *fb_ptr, short src_w, + CARD32 *tsp1, CARD32 *tpu1, CARD32 *tpv1, + CARD32 *tsp2, CARD32 *tpu2, CARD32 *tpv2, + int beta , int xds) +{ + unsigned int k,y0_1,y1_1,y0_2,y1_2,u,v; + int yf[8], uf[4], vf[4]; + + for(k=src_w/8;k;k--) { + y0_1=*tsp1; + y1_1=*(tsp1+1); + y0_2=*tsp2; + y1_2=*(tsp2+1); + u=*tpu1; + v=*tpv1; + + tsp1+=2; tsp2+=2; tpu1++; tpv1++; + yf[0] = ((y0_1&0x000000ff) + (y0_2&0x000000ff))>>8; + yf[1] = (((y0_1&0x0000ff00)>>8) + ((y0_2&0x0000ff00)>>8))>>8; + yf[2] = (((y0_1&0x00ff0000)>>16) + ((y0_2&0x00ff0000)>>16))>>8; + yf[3] = (((y0_1&0x000000ff)>>24) + ((y0_2&0x000000ff)>>24))>>8; + yf[4] = ((y1_1&0x000000ff) + (y1_2&0x000000ff))>>8; + yf[5] = (((y1_1&0x0000ff00)>>8) + ((y1_2&0x0000ff00)>>8))>>8; + yf[6] = (((y1_1&0x00ff0000)>>16) + ((y1_2&0x00ff0000)>>16))>>8; + yf[7] = (((y1_1&0x000000ff)>>24) + ((y1_2&0x000000ff)>>24))>>8; + + *(fb_ptr)=(yf[0]) | (yf[1]<<16) | + (v&0x000000ff)<<24 | (u&0x000000ff)<<8; + *(fb_ptr+1)=(yf[2]) | (yf[3]<<16) | + (v&0x0000ff00)<<16 | (u&0x0000ff00); + *(fb_ptr+2)=(yf[4]) | (yf[5]<<16) | + (v&0x00ff0000)<<8 | (u&0x00ff0000)>>8; + *(fb_ptr+3)=(yf[6]) | (yf[7]<<16) | + (v&0xff000000) | (u&0xff000000)>>16; + + fb_ptr+=4; + } +} + +static void MGACopyScaledILOAD( + ScrnInfoPtr pScrn, + int id, unsigned char *buf, + BoxPtr pbox, + int width, int height, int pitch, + short src_x, short src_y, + short src_w, short src_h, + short drw_x, short drw_y, + short drw_w, short drw_h + ) +{ + MGAPtr pMga = MGAPTR(pScrn); + CARD32 *fb_ptr; + unsigned char *ubuf, *vbuf, *tbuf; + CARD32 *pu, *pv; + int k,l, pl, dl, xds, yds; + short box_h; + short scr_pitch = ( pScrn->virtualX + 15) & ~15; + +#ifdef DEBUG_MGA2164 + char sbuf[255]; + + sprintf(sbuf,"---- PBOX: x1=%d y1=%d w=%d h=%d (x2=%d y2=%d)\n", + pbox->x1,pbox->y1,pbox->x2-pbox->x1,pbox->y2-pbox->y1, + pbox->x2,pbox->y2); + xf86DrvMsg(pScrn->scrnIndex, X_INFO, sbuf); + + sprintf(sbuf,"in src: src_x=%d src_y=%d src_w=%d src_h=%d\n", + src_x,src_y,src_w,src_h); + xf86DrvMsg(pScrn->scrnIndex, X_INFO, sbuf); + sprintf(sbuf,"in drw: drw_x=%d drw_y=%d drw_w=%d drw_h=%d\n", + drw_x,drw_y,drw_w,drw_h); + xf86DrvMsg(pScrn->scrnIndex, X_INFO, sbuf); +#endif + + /* scaling yuv->rgb */ + + /* hack to force width and src image to be 8 pixel aligned */ + src_x&=~0x7; + src_w&=~0x7; + + box_h=pbox->y2-pbox->y1; + + /* compute X down scaling factor */ + if(src_w>drw_w) { + if(src_w/2<drw_w) { + xds=2; + } else if(src_w/4<drw_w) { + xds=4; + } else { xds=8; } + } else xds = 1; + + /* prevent crashing when dragging window outside left boundary of screen */ + /* FIXME: need to implement per pixel left start to avoid undesired + effects when dragging window outside left screen boundary */ + + if(drw_x<0) { + src_x=( -(drw_x*src_w)/drw_w + 0x7)&~0x7; + src_w-=src_x; + drw_w+=drw_x; + drw_x=0; + } + + src_w/=xds; + + /* compute X down scaling factor */ + if(src_h>drw_h) { + if(src_h/2<drw_h) { + yds=2; + } else if(src_h/4<drw_h) { + yds=4; + } else { yds=8; } + } else yds = 1; + + +#ifdef DEBUG_MGA2164 + char sbuf[255]; + + sprintf(sbuf,"---- xds = %d\n", + xds); + xf86DrvMsg(pScrn->scrnIndex, X_INFO, sbuf); +#endif + + +#ifdef DEBUG_MGA2164 + sprintf(sbuf,"out src: src_x=%d src_y=%d src_w=%d src_h=%d\n", + src_x,src_y,src_w,src_h); + xf86DrvMsg(pScrn->scrnIndex, X_INFO, sbuf); + sprintf(sbuf,"out drw: drw_x=%d drw_y=%d drw_w=%d drw_h=%d\n", + drw_x,drw_y,drw_w,drw_h); + xf86DrvMsg(pScrn->scrnIndex, X_INFO, sbuf); +#endif + + CHECK_DMA_QUIESCENT(pMga, pScrn); + + /* scaling ILOAD */ + + vbuf=buf+width*height; + ubuf=vbuf+width*height/4; + pu = (CARD32 *)(ubuf+(src_y/2)*(width/2)); + pv = (CARD32 *)(vbuf+(src_y/2)*(width/2)); + + for(pl=-1,dl=0;dl<box_h;dl++) { + int beta; + l=(dl+(pbox->y1-drw_y))*src_h/drw_h; + /* FIXME: check the math */ + beta = ((dl+(pbox->y1-drw_y))*src_h*0xff/drw_h) - ((dl+(pbox->y1-drw_y))*src_h/drw_h*0xff); + +#ifdef MGA2164_BLIT_DUP + if(l!=pl) +#else + if(1) +#endif + { + + /* + #ifdef DEBUG_MGA2164 + sprintf(sbuf,"new line: scr_dst %d img_src %d prev %d\n", + dl,l,pl); + xf86DrvMsg(pScrn->scrnIndex, X_INFO, sbuf); + #endif + */ + + OUTREG(MGAREG_DWGCTL, MGADWG_ILOAD_HIQH | MGADWG_BUYUV | MGADWG_SHIFTZERO + | MGADWG_SGNZERO | 0xc0000); + + OUTREG(MGAREG_AR0, pbox->x1 + drw_w -1); /* SRC LINE END why -1 ? */ + OUTREG(MGAREG_AR2, ( ( (src_w-1)<<16) / (drw_w-1)) + 1 ); /* ((SRC_X_DIM -1)<<16) / (DST_X_DIM-1) +1 */ + OUTREG(MGAREG_AR3, pbox->x1 ); /* SRC LINE START*/ + OUTREG(MGAREG_AR5, scr_pitch); /* DST_Y_INCR = PITCH? */ + OUTREG(MGAREG_AR6, ((src_w-drw_w)<<16) / (drw_w-1) ); /* */ + OUTREG(MGAREG_FXBNDRY, drw_x|((drw_x+drw_w-1)<<16) ); /* why -1 ? */ + OUTREG(MGAREG_CXBNDRY, pbox->x1 | ((pbox->x2-1)<<16 ) ); + OUTREG(MGAREG_YDST , pbox->y1+dl ); /* Y_START_POS */ + OUTREG(MGAREG_LEN + MGAREG_EXEC , 1); /* # of LINES */ + + /* xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Data finished\n"); */ + + fb_ptr=(CARD32 *)pMga->ILOADBase; + + switch(id) { + case FOURCC_YV12: + case FOURCC_I420: + tbuf=buf+(l+src_y)*width; + { + CARD32 *tpu=pu+src_x/8+l/2*width/8; + CARD32 *tpv=pv+src_x/8+l/2*width/8; + CARD32 *tsp=(CARD32 *)(tbuf+src_x), *tsp2; + + if((l+src_y)<(src_h-1)) + tsp2=(CARD32 *)(tbuf+src_x+width); + else + tsp2=(CARD32 *)(tbuf+src_x); + + /* it is not clear if waiting is actually good for performance */ + /* WAITFIFO(pMga->FifoSize);*/ + /* should try to get MGACopyMunged data to work here */ + /* CopyMungedScanline_AXP(fb_ptr,src_w,tsp,tpu,tpv); */ + + /* Filter does not work yet */ + CopyMungedScanlineFilter_AXP(fb_ptr,src_w,tsp,tpu,tpv,tsp2,tpu,tpv, beta, xds); + /* if(l&1) { + pu+=width/8; + pv+=width/8; + } */ + } + break; + case FOURCC_UYVY: + case FOURCC_YUY2: + tbuf=buf+(l+src_y)*width*2; + +#ifndef MGA2164_SWFILTER + WAITFIFO(pMga->FifoSize/2); + memcpy(fb_ptr, tbuf+src_x*2, src_w*2); + fb_ptr+=src_w*2; /* pointer in the pseudo dma window */ +#else + { + CARD32 *tsp=(CARD32 *)(tbuf+src_x*2), *tsp2; + + if((l+src_y)<(src_h-1)) + tsp2=(CARD32 *)(tbuf+src_x*2+width*2); + else + tsp2=(CARD32 *)(tbuf+src_x*2); + /* { + char sbuf [256]; + sprintf(sbuf,"dst line: %d src_line: %d beta: %x\n", + dl, l, beta ); + xf86DrvMsg(pScrn->scrnIndex, X_INFO, sbuf); + } */ + + WAITFIFO(pMga->FifoSize/4); + for(k=xds*src_w/8;k;k--) { + int oneminbeta = 0xff-beta; + int y[8], u[4], v[4], ya[4], ua[2], va[2], p; + + switch(yds) { + case 1: + /* upscale y filter */ + for(p=0;p<4;p++) { + y[2*p]=(((*(tsp+p)&0x000000ff))*oneminbeta+((*(tsp2+p)&0x000000ff))*beta)>>8; + y[2*p+1]=(((*(tsp+p)&0x00ff0000)>>16)*oneminbeta+((*(tsp2+p)&0x00ff0000)>>16)*beta)>>8; + u[p]=(((*(tsp+p)&0x0000ff00)>>8)*oneminbeta+((*(tsp2+p)&0x0000ff00)>>8)*beta)>>8; + v[p]=(((*(tsp+p)&0xff000000)>>24)*oneminbeta+((*(tsp2+p)&0xff000000)>>24)*beta)>>8; + } + break; + /* downscale y filter */ + case 2: + case 3: + case 4: + default: + for(p=0;p<4;p++) { + y[2*p]=(((*(tsp+p)&0x000000ff))); + y[2*p+1]=(((*(tsp+p)&0x00ff0000)>>16)); + u[p]=(((*(tsp+p)&0x0000ff00)>>8)); + v[p]=(((*(tsp+p)&0xff000000)>>24)); + } + break; + } + + switch (xds) { + case 1: /* simple copy */ + *(fb_ptr++)=y[0]|y[1]<<16|u[0]<<8|v[0]<<24; + *(fb_ptr++)=y[2]|y[3]<<16|u[1]<<8|v[1]<<24; + *(fb_ptr++)=y[4]|y[5]<<16|u[2]<<8|v[2]<<24; + *(fb_ptr++)=y[6]|y[7]<<16|u[3]<<8|v[3]<<24; + break; + case 2: /* dowscale by 2 */ + ya[0]=(y[0]+y[1])>>1; + ya[1]=(y[2]+y[3])>>1; + ya[2]=(y[4]+y[5])>>1; + ya[3]=(y[6]+y[7])>>1; + ua[0]=(u[0]+u[1])>>1; + ua[1]=(u[2]+u[3])>>1; + va[0]=(v[0]+v[1])>>1; + va[1]=(v[2]+v[3])>>1; + *(fb_ptr++)=ya[0]|ya[1]<<16|ua[0]<<8|va[0]<<24; + *(fb_ptr++)=ya[2]|ya[3]<<16|ua[1]<<8|va[1]<<24; + break; + case 4: /* downscale by 4 */ + ya[0]=(y[0]+y[1]+y[2]+y[3])>>2; + ya[1]=(y[4]+y[5]+y[6]+y[7])>>2; + ua[0]=(u[0]+u[1]+u[2]+u[3])>>2; + va[0]=(v[0]+v[1]+v[2]+v[3])>>2; + *(fb_ptr++)=ya[0]|ya[1]<<16|ua[0]<<8|va[0]<<24; + break; + case 8: + default: + break; + } + + /* fb_ptr+=4; */ + tsp+=4; tsp2+=4; + } + } +#endif /* MGA2164_SWFILTER */ + break; + default: + break; + } + pl=l; + } else { + /* dup lines */ + +#ifdef DEBUG_MGA2164 + sprintf(sbuf,"dup line: scr_src %d scr_dst %d\n", + dl-1,dl); + xf86DrvMsg(pScrn->scrnIndex, X_INFO, sbuf); +#endif + + OUTREG(MGAREG_DWGCTL, 0x040C6008); + OUTREG(MGAREG_FXBNDRY, pbox->x1|((pbox->x2-1)<<16) ); /* why -1 ? */ + OUTREG(MGAREG_AR3, (pbox->y1+dl-1)*scr_pitch+pbox->x1 ); /* SRC LINE START*/ + OUTREG(MGAREG_AR0, (pbox->y1+dl-1)*scr_pitch+pbox->x2 -1); /* SRC LINE END why -1 ? */ + OUTREG(MGAREG_AR5, scr_pitch); /* DST_Y_INCR = PITCH? */ + OUTREG(MGAREG_YDST , pbox->y1+dl); /* Y_START_POS */ + OUTREG(MGAREG_LEN + MGAREG_EXEC , 1); /* # of LINES */ + } + } + OUTREG(MGAREG_CXBNDRY, 0xFFFF0000); +} + +static void MGACopyILOAD( + ScrnInfoPtr pScrn, + int id, unsigned char *buf, + BoxPtr pbox, + int width, int height, int pitch, + short src_x, short src_y, + short src_w, short src_h, + short drw_x, short drw_y, + short drw_w, short drw_h + ) +{ + MGAPtr pMga = MGAPTR(pScrn); + CARD32 *fb_ptr; + CARD8 *ubuf, *vbuf; + CARD32 *pu, *pv; + int k,l; + short clip_x1, clip_x2, tmp_w; + +#ifdef DEBUG_MGA2164 + char sbuf[255]; + + sprintf(sbuf,"---- PBOX: x1=%d y1=%d w=%d h=%d (x2=%d y2=%d)\n", + pbox->x1,pbox->y1,pbox->x2-pbox->x1,pbox->y2-pbox->y1, + pbox->x2,pbox->y2); + xf86DrvMsg(pScrn->scrnIndex, X_INFO, sbuf); + + sprintf(sbuf,"in src: src_x=%d src_y=%d src_w=%d src_h=%d\n", + src_x,src_y,src_w,src_h); + xf86DrvMsg(pScrn->scrnIndex, X_INFO, sbuf); + sprintf(sbuf,"in drw: drw_x=%d drw_y=%d drw_w=%d drw_h=%d\n", + drw_x,drw_y,drw_w,drw_h); + xf86DrvMsg(pScrn->scrnIndex, X_INFO, sbuf); +#endif + + /* non-scaling yuv->rgb */ + + /* hack to force width and src image to be 8 pixel aligned */ + src_x&=~0x7; + src_w&=~0x7; + drw_w&=~0x7; + tmp_w=drw_w; + clip_x1=drw_x; + clip_x2=drw_x+drw_w; + + /* hack for clipping in non scaling version */ + /* this works only if no scaling */ + if(pbox->x1 > drw_x) { /* left side X clipping*/ + src_x+=((pbox->x1-drw_x)&~0x7); + src_w-=((pbox->x1-drw_x)&~0x7); + clip_x1=pbox->x1; + drw_x+=src_x; + drw_w=src_w; + } + + if( (pbox->x2) < (drw_x+drw_w) ) { /* right side X clipping */ + tmp_w=( (pbox->x2) - drw_x ); + drw_w= tmp_w & (~0x7); + if(drw_w!=tmp_w) drw_w+=8; + clip_x2=drw_x+tmp_w-1; /* not sure why needs -1 */ + src_w=drw_w; + } + + if(pbox->y1 > drw_y) { /* top side Y clipping */ + src_y+=(pbox->y1-drw_y); + src_h-=(pbox->y1-drw_y); + drw_y+=src_y; + drw_h=src_h; + } + if((pbox->y2)<(drw_y+drw_h)) { /* bottom side Y clipping */ + drw_h=(pbox->y2)-drw_y; + src_h=drw_h; + } + + if(drw_x<0) drw_x=0; + +#ifdef DEBUG_MGA2164 + sprintf(sbuf,"out src: src_x=%d src_y=%d src_w=%d src_h=%d\n", + src_x,src_y,src_w,src_h); + xf86DrvMsg(pScrn->scrnIndex, X_INFO, sbuf); + sprintf(sbuf,"out drw: drw_x=%d drw_y=%d drw_w=%d drw_h=%d\n", + drw_x,drw_y,drw_w,drw_h); + xf86DrvMsg(pScrn->scrnIndex, X_INFO, sbuf); +#endif + + /* ready to draw */ + if(drw_w==0||drw_h==0) return; + + if(drw_w<0||drw_h<0) { + /* actually until scaling is working this might happen + during normal operation */ + /* sprintf(sbuf,"drw_w or drw_h are negative (this should never + happen)\n"); + xf86DrvMsg(pScrn->scrnIndex, X_INFO, sbuf); */ + return; + } + + CHECK_DMA_QUIESCENT(pMga, pScrn); + + /* non scaling ILOAD */ + WAITFIFO(6); + OUTREG(MGAREG_AR5, 0); + OUTREG(MGAREG_DWGCTL, MGADWG_ILOAD | MGADWG_BUYUV | MGADWG_SHIFTZERO + | MGADWG_SGNZERO | 0xc0000); + OUTREG(MGAREG_AR0, (drw_w)-1 ); + OUTREG(MGAREG_AR3, 0); + OUTREG(MGAREG_CXBNDRY, clip_x1|(clip_x2<<16)); + OUTREG(MGAREG_FXBNDRY, drw_x|((drw_x+drw_w-1)<<16)); + OUTREG(MGAREG_YDSTLEN + MGAREG_EXEC , (drw_y<<16)|drw_h); + + fb_ptr=(CARD32 *)pMga->ILOADBase; + vbuf=buf+width*height; + ubuf=vbuf+width*height/4; + + switch(id) { + case FOURCC_YV12: + case FOURCC_I420: + pu = (CARD32 *)(ubuf+(src_y/2)*(width/2)); + pv = (CARD32 *)(vbuf+(src_y/2)*(width/2)); + buf+=src_y*width; + + for(l=0;l<drw_h;l++) { + CARD32 *tpu=pu+src_x/8; + CARD32 *tpv=pv+src_x/8; + CARD32 *tsp=(CARD32 *)(buf+src_x); + + /* it is not clear if waiting is actually good for performance */ + /* WAITFIFO(pMga->FifoSize);*/ + /* should try to get MGACopyMunged data to work here */ + CopyMungedScanline_AXP(fb_ptr,src_w,tsp,tpu,tpv); + buf+=width; + if(l&1) { + pu+=width/8; + pv+=width/8; + } + } + break; + case FOURCC_UYVY: + case FOURCC_YUY2: + buf+=src_y*width*2; + for(l=0;l<drw_h;l++) { + +#ifndef CUSTOM_MEMCOPY + WAITFIFO(pMga->FifoSize/2); /* not sure what's the value for best performance */ + memcpy(fb_ptr, buf+src_x*2, src_w*2); + fb_ptr+=src_w*2; +#else + CARD32 *tsp=(CARD32 *)(buf+src_x*2); + WAITFIFO(pMga->FifoSize/4); + for(k=src_w/8;k;k--) { + *(fb_ptr)=*(tsp); + *(fb_ptr+1)=*(tsp+1); + *(fb_ptr+2)=*(tsp+2); + *(fb_ptr+3)=*(tsp+3); + fb_ptr+=4; tsp+=4; + } +#endif /* CUSTOM_MEMCOPY */ + buf+=width*2; + } + break; + default: + break; + } + OUTREG(MGAREG_CXBNDRY, 0xFFFF0000); /* put clipping back to normal */ +} + +static int +MGAPutImageILOAD( + ScrnInfoPtr pScrn, + short src_x, short src_y, + short drw_x, short drw_y, + short src_w, short src_h, + short drw_w, short drw_h, + int id, unsigned char* buf, + short width, short height, + Bool Sync, + RegionPtr clipBoxes, pointer data, + DrawablePtr pDraw + ){ + MGAPtr pMga = MGAPTR(pScrn); + MGAPortPrivPtr pPriv = pMga->portPrivate; + INT32 x1, x2, y1, y2; + int dstPitch = 0; + int bpp; + BoxRec dstBox; + int nbox; + BoxPtr pbox; + + /* Clip */ + x1 = src_x; x2 = src_x + src_w; + y1 = src_y; y2 = src_y + src_h; + + dstBox.x1 = drw_x; dstBox.x2 = drw_x + drw_w; + dstBox.y1 = drw_y; dstBox.y2 = drw_y + drw_h; + + if(!xf86XVClipVideoHelper(&dstBox, &x1, &x2, &y1, &y2, + clipBoxes, width, height)) + return Success; + + bpp = pScrn->bitsPerPixel >> 3; + + if( pMga->AccelInfoRec->NeedToSync && ((long)data != pPriv->lastPort) ) { + MGAStormSync(pScrn); + } + + pPriv->lastPort = (long)data; + nbox=REGION_NUM_RECTS(clipBoxes); + pbox=REGION_RECTS(clipBoxes); + + while(nbox--) { + + if ( (drw_w==src_w) && (drw_h==src_h) && (drw_x >= 0 ) ) { + /* special case 1: non scaling optimization */ + MGACopyILOAD(pScrn,id,buf,pbox, + width, height, dstPitch, src_x, src_y, src_w, src_h, + drw_x, drw_y, drw_w, drw_h); +#if 0 + } else if ( (drw_w>src_w) && (drw_h>src_h) && (drw_x >= 0 ) ) { + /* special case 2: upscaling for full screen apps */ + /* FIXME: to do */ + MGACopyScaledILOAD(pScrn,id,buf,pbox, + width, height, dstPitch, src_x, src_y, src_w, src_h, + drw_x, drw_y, drw_w, drw_h); + +#endif + } else /* generic fallback case */ + MGACopyScaledILOAD(pScrn,id,buf,pbox, + width, height, dstPitch, src_x, src_y, src_w, src_h, + drw_x, drw_y, drw_w, drw_h); + /* FIXME: when the generic is perfect I will enable the optimizations */ + pbox++; + } + + + pMga->AccelInfoRec->NeedToSync = TRUE; + pPriv->videoStatus = FREE_TIMER; + pPriv->freeTime = currentTime.milliseconds + FREE_DELAY; + pMga->VideoTimerCallback = MGAVideoTimerCallback; + + return Success; +} |