diff options
-rw-r--r-- | man/radeon.man | 11 | ||||
-rw-r--r-- | src/radeon.h | 103 | ||||
-rw-r--r-- | src/radeon_accel.c | 190 | ||||
-rw-r--r-- | src/radeon_accelfuncs.c | 126 | ||||
-rw-r--r-- | src/radeon_commonfuncs.c | 186 | ||||
-rw-r--r-- | src/radeon_cursor.c | 91 | ||||
-rw-r--r-- | src/radeon_dga.c | 91 | ||||
-rw-r--r-- | src/radeon_dri.c | 85 | ||||
-rw-r--r-- | src/radeon_driver.c | 830 | ||||
-rw-r--r-- | src/radeon_exa.c | 463 | ||||
-rw-r--r-- | src/radeon_exa_funcs.c | 408 | ||||
-rw-r--r-- | src/radeon_exa_render.c | 799 | ||||
-rw-r--r-- | src/radeon_mergedfb.c | 20 | ||||
-rw-r--r-- | src/radeon_reg.h | 75 | ||||
-rw-r--r-- | src/radeon_render.c | 141 | ||||
-rw-r--r-- | src/radeon_video.c | 324 | ||||
-rw-r--r-- | src/radeon_video.h | 8 |
17 files changed, 3201 insertions, 750 deletions
diff --git a/man/radeon.man b/man/radeon.man index b08ba069..d594d5a7 100644 --- a/man/radeon.man +++ b/man/radeon.man @@ -501,6 +501,17 @@ unsupported). The default is to .B enable Render acceleration. .TP +.BI "Option \*qAccelMethod\*q \*q" "string" \*q +Chooses between available acceleration architectures. Valid options are +.B XAA +and +.B EXA. +XAA is the traditional acceleration architecture and support for it is very +stable. EXA is a newer acceleration architecture with better performance for +the Render and Composite extensions, but the rendering code for it is newer and +possibly unstable. The default is +.B XAA. +.TP .BI "Option \*qDMAForXv\*q \*q" boolean \*q Try or don't try to use DMA for Xv image transfers. This will reduce CPU usage when playing big videos like DVDs, but may cause instabilities. diff --git a/src/radeon.h b/src/radeon.h index b1060cdc..71504874 100644 --- a/src/radeon.h +++ b/src/radeon.h @@ -43,8 +43,15 @@ /* PCI support */ #include "xf86Pci.h" - /* XAA and Cursor Support */ +#ifdef USE_EXA +#include "exa.h" +#endif +#ifdef USE_XAA #include "xaa.h" +#include "xf86fbman.h" +#endif + + /* Exa and Cursor Support */ #include "vbe.h" #include "xf86Cursor.h" @@ -122,6 +129,7 @@ do { \ #define RADEON_ALIGN(x,bytes) (((x) + ((bytes) - 1)) & ~((bytes) - 1)) #define RADEONPTR(pScrn) ((RADEONInfoPtr)(pScrn)->driverPrivate) + typedef struct { /* Common registers */ CARD32 ovr_clr; @@ -382,11 +390,25 @@ typedef struct { Bool PaletteSavedOnVT; /* Palette saved on last VT switch */ +#ifdef USE_EXA + ExaDriverRec exa; +#endif +#ifdef USE_XAA XAAInfoRecPtr accel; + int engineMode; +#define EXA_ENGINEMODE_UNKNOWN 0 +#define EXA_ENGINEMODE_2D 1 +#define EXA_ENGINEMODE_3D 2 +#endif Bool accelOn; xf86CursorInfoPtr cursor; - unsigned long cursor_start; +#ifdef USE_EXA + ExaOffscreenArea *cursorArea; +#endif + unsigned long cursor_offset; +#ifdef USE_XAA unsigned long cursor_end; +#endif Bool allowColorTiling; Bool tilingEnabled; /* mirror of sarea->tiling_enabled */ #ifdef ARGB_CURSOR @@ -395,11 +417,13 @@ typedef struct { int cursor_fg; int cursor_bg; +#ifdef USE_XAA /* * XAAForceTransBlit is used to change the behavior of the XAA * SetupForScreenToScreenCopy function, to make it DGA-friendly. */ Bool XAAForceTransBlit; +#endif int fifo_slots; /* Free slots in the FIFO (64 max) */ int pix24bpp; /* Depth of pixmap for 24bpp fb */ @@ -416,6 +440,7 @@ typedef struct { int xdir; int ydir; +#ifdef USE_XAA /* ScanlineScreenToScreenColorExpand support */ unsigned char *scratch_buffer[1]; unsigned char *scratch_save; @@ -432,7 +457,7 @@ typedef struct { int scanline_hpass; int scanline_x1clip; int scanline_x2clip; - +#endif /* Saved values for DashedTwoPointLine */ int dashLen; CARD32 dashPattern; @@ -539,6 +564,7 @@ typedef struct { int textureSize; int log2TexGran; +#ifdef USE_XAA CARD32 frontPitchOffset; CARD32 backPitchOffset; CARD32 depthPitchOffset; @@ -551,6 +577,7 @@ typedef struct { FBAreaPtr backArea; int depthTexLines; FBAreaPtr depthTexArea; +#endif /* Saved scissor values */ CARD32 sc_left; @@ -580,7 +607,6 @@ typedef struct { /* XVideo */ XF86VideoAdaptorPtr adaptor; void (*VideoTimerCallback)(ScrnInfoPtr, Time); - FBLinearPtr videoLinear; int videoKey; int RageTheatreCrystal; int RageTheatreTunerPort; @@ -603,19 +629,31 @@ typedef struct { CARD8 input[5]; } MM_TABLE; CARD16 video_decoder_type; - /* Render */ + + /* Render */ Bool RenderAccel; - Bool RenderInited3D; +#ifdef USE_XAA FBLinearPtr RenderTex; void (*RenderCallback)(ScrnInfoPtr); Time RenderTimeout; +#endif - /* general */ + /* general */ Bool showCache; OptionInfoPtr Options; + + Bool useEXA; #ifdef XFree86LOADER +#ifdef USE_EXA + XF86ModReqInfo exaReq; +#endif +#ifdef USE_XAA XF86ModReqInfo xaaReq; #endif +#endif + + /* X itself has the 3D context */ + Bool XInited3D; /* merged fb stuff, also covers clone modes */ Bool MergedFB; @@ -692,11 +730,22 @@ extern void RADEONWaitForVerticalSync2(ScrnInfoPtr pScrn); extern void RADEONChangeSurfaces(ScrnInfoPtr pScrn); extern Bool RADEONAccelInit(ScreenPtr pScreen); +#ifdef USE_EXA +extern Bool RADEONSetupMemEXA (ScreenPtr pScreen); +extern Bool RADEONDrawInitMMIO(ScreenPtr pScreen); +#ifdef XF86DRI +extern Bool RADEONDrawInitCP(ScreenPtr pScreen); +#endif +#endif +#ifdef USE_XAA extern void RADEONAccelInitMMIO(ScreenPtr pScreen, XAAInfoRecPtr a); +#endif extern void RADEONEngineInit(ScrnInfoPtr pScrn); extern Bool RADEONCursorInit(ScreenPtr pScreen); extern Bool RADEONDGAInit(ScreenPtr pScreen); +extern void RADEONInit3DEngine(ScrnInfoPtr pScrn); + extern int RADEONMinBits(int val); extern void RADEONInitVideo(ScreenPtr pScreen); @@ -707,7 +756,9 @@ extern void RADEONPllErrataAfterIndex(RADEONInfoPtr info); extern void RADEONPllErrataAfterData(RADEONInfoPtr info); #ifdef XF86DRI +#ifdef USE_XAA extern void RADEONAccelInitCP(ScreenPtr pScreen, XAAInfoRecPtr a); +#endif extern Bool RADEONDRIScreenInit(ScreenPtr pScreen); extern void RADEONDRICloseScreen(ScreenPtr pScreen); extern void RADEONDRIResume(ScreenPtr pScreen); @@ -723,10 +774,14 @@ extern CARD8* RADEONHostDataBlit(ScrnInfoPtr pScrn, unsigned int bpp, unsigned int w, CARD32 dstPitch, CARD32 *bufPitch, CARD8 **dst, unsigned int *h, unsigned int *hpass); -extern void RADEONHostDataBlitCopyPass(CARD8 *dst, CARD8 *src, +extern void RADEONHostDataBlitCopyPass(ScrnInfoPtr pScrn, + unsigned int bpp, + CARD8 *dst, CARD8 *src, unsigned int hpass, unsigned int dstPitch, unsigned int srcPitch); +extern void RADEONCopySwap(CARD8 *dst, CARD8 *src, unsigned int size, + int swap); extern Bool RADEONGetBIOSInfo(ScrnInfoPtr pScrn, xf86Int10InfoPtr pInt10); extern Bool RADEONGetConnectorInfoFromBIOS (ScrnInfoPtr pScrn); @@ -907,19 +962,43 @@ do { \ #define RADEON_FLUSH_CACHE() \ do { \ BEGIN_RING(2); \ - OUT_RING(CP_PACKET0(RADEON_RB2D_DSTCACHE_CTLSTAT, 0)); \ - OUT_RING(RADEON_RB2D_DC_FLUSH); \ + OUT_RING(CP_PACKET0(RADEON_RB3D_DSTCACHE_CTLSTAT, 0)); \ + OUT_RING(RADEON_RB3D_DC_FLUSH); \ ADVANCE_RING(); \ } while (0) #define RADEON_PURGE_CACHE() \ do { \ BEGIN_RING(2); \ - OUT_RING(CP_PACKET0(RADEON_RB2D_DSTCACHE_CTLSTAT, 0)); \ - OUT_RING(RADEON_RB2D_DC_FLUSH_ALL); \ + OUT_RING(CP_PACKET0(RADEON_RB3D_DSTCACHE_CTLSTAT, 0)); \ + OUT_RING(RADEON_RB3D_DC_FLUSH_ALL); \ ADVANCE_RING(); \ } while (0) #endif /* XF86DRI */ +static __inline__ void RADEON_MARK_SYNC(RADEONInfoPtr info, ScrnInfoPtr pScrn) +{ +#ifdef USE_EXA + if (info->useEXA) + exaMarkSync(pScrn->pScreen); +#endif +#ifdef USE_XAA + if (!info->useEXA) + SET_SYNC_FLAG(info->accel); +#endif +} + +static __inline__ void RADEON_SYNC(RADEONInfoPtr info, ScrnInfoPtr pScrn) +{ +#ifdef USE_EXA + if (info->useEXA) + exaWaitSync(pScrn->pScreen); +#endif +#ifdef USE_XAA + if (!info->useEXA && info->accel) + info->accel->Sync(pScrn); +#endif +} + #endif /* _RADEON_H_ */ diff --git a/src/radeon_accel.c b/src/radeon_accel.c index d89568da..5cdabcbd 100644 --- a/src/radeon_accel.c +++ b/src/radeon_accel.c @@ -93,6 +93,7 @@ /* X and server generic header files */ #include "xf86.h" + static struct { int rop; int pattern; @@ -160,6 +161,10 @@ void RADEONEngineFlush(ScrnInfoPtr pScrn) if (!(INREG(RADEON_RB2D_DSTCACHE_CTLSTAT) & RADEON_RB2D_DC_BUSY)) break; } + if (i == RADEON_TIMEOUT) { + RADEONTRACE(("DC flush timeout: %x\n", + INREG(RADEON_RB2D_DSTCACHE_CTLSTAT))); + } } /* Reset graphics card to known state */ @@ -342,9 +347,7 @@ void RADEONEngineRestore(ScrnInfoPtr pScrn) RADEONWaitForIdleMMIO(pScrn); -#ifdef RENDER - info->RenderInited3D = FALSE; -#endif + info->XInited3D = FALSE; } /* Initialize the acceleration hardware */ @@ -399,13 +402,15 @@ void RADEONEngineInit(ScrnInfoPtr pScrn) RADEONEngineRestore(pScrn); } + #define ACCEL_MMIO #define ACCEL_PREAMBLE() unsigned char *RADEONMMIO = info->MMIO #define BEGIN_ACCEL(n) RADEONWaitForFifo(pScrn, (n)) #define OUT_ACCEL_REG(reg, val) OUTREG(reg, val) #define FINISH_ACCEL() -#ifdef RENDER +#include "radeon_commonfuncs.c" +#if defined(RENDER) && defined(USE_XAA) #include "radeon_render.c" #endif #include "radeon_accelfuncs.c" @@ -426,7 +431,9 @@ void RADEONEngineInit(ScrnInfoPtr pScrn) #define OUT_ACCEL_REG(reg, val) OUT_RING_REG(reg, val) #define FINISH_ACCEL() ADVANCE_RING() -#ifdef RENDER + +#include "radeon_commonfuncs.c" +#if defined(RENDER) && defined(USE_XAA) #include "radeon_render.c" #endif #include "radeon_accelfuncs.c" @@ -656,14 +663,22 @@ RADEONHostDataBlit( } #if X_BYTE_ORDER == X_BIG_ENDIAN - BEGIN_RING(2); - if (bpp == 2) - OUT_RING_REG(RADEON_RBBM_GUICNTL, RADEON_HOST_DATA_SWAP_16BIT); - else if (bpp == 1) - OUT_RING_REG(RADEON_RBBM_GUICNTL, RADEON_HOST_DATA_SWAP_32BIT); - else - OUT_RING_REG(RADEON_RBBM_GUICNTL, RADEON_HOST_DATA_SWAP_NONE); - ADVANCE_RING(); + /* Swap doesn't work on R300 and later, it's handled during the + * copy to ind. buffer pass + */ + if (info->ChipFamily < CHIP_FAMILY_R300) { + BEGIN_RING(2); + if (bpp == 2) + OUT_RING_REG(RADEON_RBBM_GUICNTL, + RADEON_HOST_DATA_SWAP_HDW); + else if (bpp == 1) + OUT_RING_REG(RADEON_RBBM_GUICNTL, + RADEON_HOST_DATA_SWAP_32BIT); + else + OUT_RING_REG(RADEON_RBBM_GUICNTL, + RADEON_HOST_DATA_SWAP_NONE); + ADVANCE_RING(); + } #endif /*RADEON_PURGE_CACHE(); @@ -704,11 +719,61 @@ RADEONHostDataBlit( return ret; } +void RADEONCopySwap(CARD8 *dst, CARD8 *src, unsigned int size, int swap) +{ + switch(swap) { + case RADEON_HOST_DATA_SWAP_HDW: + { + unsigned int *d = (unsigned int *)dst; + unsigned int *s = (unsigned int *)src; + unsigned int nwords = size >> 2; + + for (; nwords > 0; --nwords, ++d, ++s) + *d = ((*s & 0xffff) << 16) | ((*s >> 16) & 0xffff); + return; + } + case RADEON_HOST_DATA_SWAP_32BIT: + { + unsigned int *d = (unsigned int *)dst; + unsigned int *s = (unsigned int *)src; + unsigned int nwords = size >> 2; + + for (; nwords > 0; --nwords, ++d, ++s) +#ifdef __powerpc__ + asm volatile("stwbrx %0,0,%1" : : "r" (*s), "r" (d)); +#else + *d = ((*s >> 24) & 0xff) | ((*s >> 8) & 0xff00) + | ((*s & 0xff00) << 8) | ((*s & 0xff) << 24); +#endif + return; + } + case RADEON_HOST_DATA_SWAP_16BIT: + { + unsigned short *d = (unsigned short *)dst; + unsigned short *s = (unsigned short *)src; + unsigned int nwords = size >> 1; + + for (; nwords > 0; --nwords, ++d, ++s) +#ifdef __powerpc__ + asm volatile("stwbrx %0,0,%1" : : "r" (*s), "r" (d)); +#else + *d = ((*s >> 24) & 0xff) | ((*s >> 8) & 0xff00) + | ((*s & 0xff00) << 8) | ((*s & 0xff) << 24); +#endif + return; + } + } + if (src != dst) + memmove(dst, src, size); +} + /* Copies a single pass worth of data for a hostdata blit set up by * RADEONHostDataBlit(). */ void RADEONHostDataBlitCopyPass( + ScrnInfoPtr pScrn, + unsigned int bpp, CARD8 *dst, CARD8 *src, unsigned int hpass, @@ -716,11 +781,27 @@ RADEONHostDataBlitCopyPass( unsigned int srcPitch ){ + RADEONInfoPtr info = RADEONPTR( pScrn ); + /* RADEONHostDataBlitCopy can return NULL ! */ if( (dst==NULL) || (src==NULL)) return; if ( dstPitch == srcPitch ) { +#if X_BYTE_ORDER == X_BIG_ENDIAN + if (info->ChipFamily >= CHIP_FAMILY_R300) { + switch(bpp) { + case 1: + RADEONCopySwap(dst, src, hpass * dstPitch, + RADEON_HOST_DATA_SWAP_32BIT); + return; + case 2: + RADEONCopySwap(dst, src, hpass * dstPitch, + RADEON_HOST_DATA_SWAP_HDW); + return; + } + } +#endif memcpy( dst, src, hpass * dstPitch ); } else @@ -728,7 +809,22 @@ RADEONHostDataBlitCopyPass( unsigned int minPitch = min( dstPitch, srcPitch ); while ( hpass-- ) { +#if X_BYTE_ORDER == X_BIG_ENDIAN + if (info->ChipFamily >= CHIP_FAMILY_R300) { + switch(bpp) { + case 1: + RADEONCopySwap(dst, src, minPitch, + RADEON_HOST_DATA_SWAP_32BIT); + goto next; + case 2: + RADEONCopySwap(dst, src, minPitch, + RADEON_HOST_DATA_SWAP_HDW); + goto next; + } + } +#endif memcpy( dst, src, minPitch ); + next: src += srcPitch; dst += dstPitch; } @@ -737,33 +833,67 @@ RADEONHostDataBlitCopyPass( #endif -/* Initialize XAA for supported acceleration and also initialize the - * graphics hardware for acceleration - */ Bool RADEONAccelInit(ScreenPtr pScreen) { ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; RADEONInfoPtr info = RADEONPTR(pScrn); - XAAInfoRecPtr a; - if (!(a = info->accel = XAACreateInfoRec())) { - xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "XAACreateInfoRec Error\n"); - return FALSE; +#ifdef USE_EXA + if (info->useEXA) { +# ifdef XF86DRI + if (info->directRenderingEnabled) { + if (!RADEONDrawInitCP(pScreen)) + return FALSE; + } else +# endif /* XF86DRI */ + { + if (!RADEONDrawInitMMIO(pScreen)) + return FALSE; + } } +#endif /* USE_EXA */ +#ifdef USE_XAA + if (!info->useEXA) { + XAAInfoRecPtr a; + + if (!(a = info->accel = XAACreateInfoRec())) { + xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "XAACreateInfoRec Error\n"); + return FALSE; + } #ifdef XF86DRI - if (info->directRenderingEnabled) - RADEONAccelInitCP(pScreen, a); - else -#endif - RADEONAccelInitMMIO(pScreen, a); + if (info->directRenderingEnabled) + RADEONAccelInitCP(pScreen, a); + else +#endif /* XF86DRI */ + RADEONAccelInitMMIO(pScreen, a); - RADEONEngineInit(pScrn); + RADEONEngineInit(pScrn); - if (!XAAInit(pScreen, a)) { - xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "XAAInit Error\n"); - return FALSE; + if (!XAAInit(pScreen, a)) { + xf86DrvMsg(pScrn->scrnIndex, X_ERROR, "XAAInit Error\n"); + return FALSE; + } } - +#endif /* USE_XAA */ return TRUE; } + +void RADEONInit3DEngine(ScrnInfoPtr pScrn) +{ + RADEONInfoPtr info = RADEONPTR (pScrn); + +#ifdef XF86DRI + if (info->directRenderingEnabled) { + RADEONSAREAPrivPtr pSAREAPriv; + + pSAREAPriv = DRIGetSAREAPrivate(pScrn->pScreen); + pSAREAPriv->ctxOwner = DRIGetContext(pScrn->pScreen); + RADEONInit3DEngineCP(pScrn); + } else +#endif + RADEONInit3DEngineMMIO(pScrn); + + info->XInited3D = TRUE; +} + diff --git a/src/radeon_accelfuncs.c b/src/radeon_accelfuncs.c index c31e2748..25d97de4 100644 --- a/src/radeon_accelfuncs.c +++ b/src/radeon_accelfuncs.c @@ -95,89 +95,14 @@ #endif #endif -/* MMIO: - * - * Wait for the graphics engine to be completely idle: the FIFO has - * drained, the Pixel Cache is flushed, and the engine is idle. This is - * a standard "sync" function that will make the hardware "quiescent". - * - * CP: - * - * Wait until the CP is completely idle: the FIFO has drained and the CP - * is idle. - */ -void -FUNC_NAME(RADEONWaitForIdle)(ScrnInfoPtr pScrn) -{ - RADEONInfoPtr info = RADEONPTR(pScrn); - unsigned char *RADEONMMIO = info->MMIO; - int i = 0; - -#ifdef ACCEL_CP - /* Make sure the CP is idle first */ - if (info->CPStarted) { - int ret; - FLUSH_RING(); - - for (;;) { - do { - ret = drmCommandNone(info->drmFD, DRM_RADEON_CP_IDLE); - if (ret && ret != -EBUSY) { - xf86DrvMsg(pScrn->scrnIndex, X_ERROR, - "%s: CP idle %d\n", __FUNCTION__, ret); - } - } while ((ret == -EBUSY) && (i++ < RADEON_TIMEOUT)); - - if (ret == 0) return; - - xf86DrvMsg(pScrn->scrnIndex, X_ERROR, - "Idle timed out, resetting engine...\n"); - RADEONEngineReset(pScrn); - RADEONEngineRestore(pScrn); - - /* Always restart the engine when doing CP 2D acceleration */ - RADEONCP_RESET(pScrn, info); - RADEONCP_START(pScrn, info); - } - } -#endif - - RADEONTRACE(("WaitForIdle (entering): %d entries, stat=0x%08x\n", - INREG(RADEON_RBBM_STATUS) & RADEON_RBBM_FIFOCNT_MASK, - INREG(RADEON_RBBM_STATUS))); - - /* Wait for the engine to go idle */ - RADEONWaitForFifoFunction(pScrn, 64); - - for (;;) { - for (i = 0; i < RADEON_TIMEOUT; i++) { - if (!(INREG(RADEON_RBBM_STATUS) & RADEON_RBBM_ACTIVE)) { - RADEONEngineFlush(pScrn); - return; - } - } - RADEONTRACE(("Idle timed out: %d entries, stat=0x%08x\n", - INREG(RADEON_RBBM_STATUS) & RADEON_RBBM_FIFOCNT_MASK, - INREG(RADEON_RBBM_STATUS))); - xf86DrvMsg(pScrn->scrnIndex, X_ERROR, - "Idle timed out, resetting engine...\n"); - RADEONEngineReset(pScrn); - RADEONEngineRestore(pScrn); -#ifdef XF86DRI - if (info->directRenderingEnabled) { - RADEONCP_RESET(pScrn, info); - RADEONCP_START(pScrn, info); - } -#endif - } -} +#ifdef USE_XAA /* This callback is required for multiheader cards using XAA */ static void FUNC_NAME(RADEONRestoreAccelState)(ScrnInfoPtr pScrn) { - RADEONInfoPtr info = RADEONPTR(pScrn); - unsigned char *RADEONMMIO = info->MMIO; + /*RADEONInfoPtr info = RADEONPTR(pScrn); + unsigned char *RADEONMMIO = info->MMIO;*/ #ifdef ACCEL_MMIO @@ -775,6 +700,8 @@ FUNC_NAME(RADEONSetupForScanlineCPUToScreenColorExpandFill)(ScrnInfoPtr pScrn, RADEONInfoPtr info = RADEONPTR(pScrn); ACCEL_PREAMBLE(); + info->scanline_bpp = 0; + /* Save for later clipping */ info->dp_gui_master_cntl_clip = (info->dp_gui_master_cntl | RADEON_GMC_DST_CLIPPING @@ -812,9 +739,12 @@ FUNC_NAME(RADEONSetupForScanlineCPUToScreenColorExpandFill)(ScrnInfoPtr pScrn, #if X_BYTE_ORDER == X_LITTLE_ENDIAN BEGIN_ACCEL(1); #else - BEGIN_ACCEL(2); + if (info->ChipFamily < CHIP_FAMILY_R300) { + BEGIN_ACCEL(2); - OUT_ACCEL_REG(RADEON_RBBM_GUICNTL, RADEON_HOST_DATA_SWAP_32BIT); + OUT_ACCEL_REG(RADEON_RBBM_GUICNTL, RADEON_HOST_DATA_SWAP_32BIT); + } else + BEGIN_ACCEL(1); #endif OUT_ACCEL_REG(RADEON_DP_WRITE_MASK, planemask); @@ -939,6 +869,22 @@ FUNC_NAME(RADEONSubsequentScanline)(ScrnInfoPtr pScrn, #else /* ACCEL_CP */ +#if X_BYTE_ORDER == X_BIG_ENDIAN + if (info->ChipFamily >= CHIP_FAMILY_R300) { + if (info->scanline_bpp == 16) { + RADEONCopySwap(info->scratch_buffer[bufno], + info->scratch_buffer[bufno], + info->scanline_words << 2, + RADEON_HOST_DATA_SWAP_HDW); + } else if (info->scanline_bpp < 15) { + RADEONCopySwap(info->scratch_buffer[bufno], + info->scratch_buffer[bufno], + info->scanline_words << 2, + RADEON_HOST_DATA_SWAP_32BIT); + } + } +#endif + if (--info->scanline_hpass) { info->scratch_buffer[bufno] += 4 * info->scanline_words; } else if (info->scanline_h) { @@ -994,12 +940,15 @@ FUNC_NAME(RADEONSetupForScanlineImageWrite)(ScrnInfoPtr pScrn, #if X_BYTE_ORDER == X_LITTLE_ENDIAN BEGIN_ACCEL(1); #else - BEGIN_ACCEL(2); - - if (bpp == 16) - OUT_ACCEL_REG(RADEON_RBBM_GUICNTL, RADEON_HOST_DATA_SWAP_HDW); - else - OUT_ACCEL_REG(RADEON_RBBM_GUICNTL, RADEON_HOST_DATA_SWAP_NONE); + if (info->ChipFamily < CHIP_FAMILY_R300) { + BEGIN_ACCEL(2); + + if (bpp == 16) + OUT_ACCEL_REG(RADEON_RBBM_GUICNTL, RADEON_HOST_DATA_SWAP_HDW); + else + OUT_ACCEL_REG(RADEON_RBBM_GUICNTL, RADEON_HOST_DATA_SWAP_NONE); + } else + BEGIN_ACCEL(1); #endif #endif OUT_ACCEL_REG(RADEON_DP_WRITE_MASK, planemask); @@ -1225,7 +1174,8 @@ FUNC_NAME(RADEONAccelInit)(ScreenPtr pScreen, XAAInfoRecPtr a) = FUNC_NAME(RADEONSetupForScanlineCPUToScreenColorExpandFill); a->SubsequentScanlineCPUToScreenColorExpandFill = FUNC_NAME(RADEONSubsequentScanlineCPUToScreenColorExpandFill); - a->SubsequentColorExpandScanline = FUNC_NAME(RADEONSubsequentScanline); + a->SubsequentColorExpandScanline + = FUNC_NAME(RADEONSubsequentScanline); /* Solid Lines */ a->SetupForSolidLine @@ -1394,4 +1344,6 @@ FUNC_NAME(RADEONAccelInit)(ScreenPtr pScreen, XAAInfoRecPtr a) #endif /* RENDER */ } +#endif /* USE_XAA */ + #undef FUNC_NAME diff --git a/src/radeon_commonfuncs.c b/src/radeon_commonfuncs.c new file mode 100644 index 00000000..a71bf3a0 --- /dev/null +++ b/src/radeon_commonfuncs.c @@ -0,0 +1,186 @@ +/* $XFree86: xc/programs/Xserver/hw/xfree86/drivers/ati/radeon_accelfuncs.c,v 1.7tsi Exp $ */ +/* + * Copyright 2000 ATI Technologies Inc., Markham, Ontario, and + * VA Linux Systems Inc., Fremont, California. + * + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining + * a copy of this software and associated documentation files (the + * "Software"), to deal in the Software without restriction, including + * without limitation on the rights to use, copy, modify, merge, + * publish, distribute, sublicense, and/or sell copies of the Software, + * and to permit persons to whom the Software is furnished to do so, + * subject to the following conditions: + * + * The above copyright notice and this permission notice (including the + * next paragraph) shall be included in all copies or substantial + * portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NON-INFRINGEMENT. IN NO EVENT SHALL ATI, VA LINUX SYSTEMS AND/OR + * THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, + * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#if defined(ACCEL_MMIO) && defined(ACCEL_CP) +#error Cannot define both MMIO and CP acceleration! +#endif + +#if !defined(UNIXCPP) || defined(ANSICPP) +#define FUNC_NAME_CAT(prefix,suffix) prefix##suffix +#else +#define FUNC_NAME_CAT(prefix,suffix) prefix/**/suffix +#endif + +#ifdef ACCEL_MMIO +#define FUNC_NAME(prefix) FUNC_NAME_CAT(prefix,MMIO) +#else +#ifdef ACCEL_CP +#define FUNC_NAME(prefix) FUNC_NAME_CAT(prefix,CP) +#else +#error No accel type defined! +#endif +#endif + +static void FUNC_NAME(RADEONInit3DEngine)(ScrnInfoPtr pScrn) +{ + RADEONInfoPtr info = RADEONPTR(pScrn); + ACCEL_PREAMBLE(); + + if (info->ChipFamily >= CHIP_FAMILY_R300) { + /* Unimplemented */ + } else if ((info->ChipFamily == CHIP_FAMILY_RV250) || + (info->ChipFamily == CHIP_FAMILY_RV280) || + (info->ChipFamily == CHIP_FAMILY_RS300) || + (info->ChipFamily == CHIP_FAMILY_R200)) { + + BEGIN_ACCEL(7); + if (info->ChipFamily == CHIP_FAMILY_RS300) { + OUT_ACCEL_REG(R200_SE_VAP_CNTL_STATUS, RADEON_TCL_BYPASS); + } else { + OUT_ACCEL_REG(R200_SE_VAP_CNTL_STATUS, 0); + } + OUT_ACCEL_REG(R200_PP_CNTL_X, 0); + OUT_ACCEL_REG(R200_PP_TXMULTI_CTL_0, 0); + OUT_ACCEL_REG(R200_SE_VTX_STATE_CNTL, 0); + OUT_ACCEL_REG(R200_RE_CNTL, 0x0); + /* XXX: correct? Want it to be like RADEON_VTX_ST?_NONPARAMETRIC */ + OUT_ACCEL_REG(R200_SE_VTE_CNTL, R200_VTX_ST_DENORMALIZED); + OUT_ACCEL_REG(R200_SE_VAP_CNTL, R200_VAP_FORCE_W_TO_ONE | + R200_VAP_VF_MAX_VTX_NUM); + FINISH_ACCEL(); + } else { + BEGIN_ACCEL(2); + if ((info->ChipFamily == CHIP_FAMILY_RADEON) || + (info->ChipFamily == CHIP_FAMILY_RV200)) + OUT_ACCEL_REG(RADEON_SE_CNTL_STATUS, 0); + else + OUT_ACCEL_REG(RADEON_SE_CNTL_STATUS, RADEON_TCL_BYPASS); + OUT_ACCEL_REG(RADEON_SE_COORD_FMT, + RADEON_VTX_XY_PRE_MULT_1_OVER_W0 | + RADEON_VTX_ST0_NONPARAMETRIC | + RADEON_VTX_ST1_NONPARAMETRIC | + RADEON_TEX1_W_ROUTING_USE_W0); + FINISH_ACCEL(); + } + + BEGIN_ACCEL(5); + OUT_ACCEL_REG(RADEON_RE_TOP_LEFT, 0); + OUT_ACCEL_REG(RADEON_RE_WIDTH_HEIGHT, 0x07ff07ff); + OUT_ACCEL_REG(RADEON_AUX_SC_CNTL, 0); + OUT_ACCEL_REG(RADEON_RB3D_PLANEMASK, 0xffffffff); + OUT_ACCEL_REG(RADEON_SE_CNTL, RADEON_DIFFUSE_SHADE_GOURAUD | + RADEON_BFACE_SOLID | + RADEON_FFACE_SOLID | + RADEON_VTX_PIX_CENTER_OGL | + RADEON_ROUND_MODE_ROUND | + RADEON_ROUND_PREC_4TH_PIX); + FINISH_ACCEL(); +} + + +/* MMIO: + * + * Wait for the graphics engine to be completely idle: the FIFO has + * drained, the Pixel Cache is flushed, and the engine is idle. This is + * a standard "sync" function that will make the hardware "quiescent". + * + * CP: + * + * Wait until the CP is completely idle: the FIFO has drained and the CP + * is idle. + */ +void FUNC_NAME(RADEONWaitForIdle)(ScrnInfoPtr pScrn) +{ + RADEONInfoPtr info = RADEONPTR(pScrn); + unsigned char *RADEONMMIO = info->MMIO; + int i = 0; + +#ifdef ACCEL_CP + /* Make sure the CP is idle first */ + if (info->CPStarted) { + int ret; + + FLUSH_RING(); + + for (;;) { + do { + ret = drmCommandNone(info->drmFD, DRM_RADEON_CP_IDLE); + if (ret && ret != -EBUSY) { + xf86DrvMsg(pScrn->scrnIndex, X_ERROR, + "%s: CP idle %d\n", __FUNCTION__, ret); + } + } while ((ret == -EBUSY) && (i++ < RADEON_TIMEOUT)); + + if (ret == 0) return; + + xf86DrvMsg(pScrn->scrnIndex, X_ERROR, + "Idle timed out, resetting engine...\n"); + RADEONEngineReset(pScrn); + RADEONEngineRestore(pScrn); + + /* Always restart the engine when doing CP 2D acceleration */ + RADEONCP_RESET(pScrn, info); + RADEONCP_START(pScrn, info); + } + } +#endif + + RADEONTRACE(("WaitForIdle (entering): %d entries, stat=0x%08x\n", + INREG(RADEON_RBBM_STATUS) & RADEON_RBBM_FIFOCNT_MASK, + INREG(RADEON_RBBM_STATUS))); + + /* Wait for the engine to go idle */ + RADEONWaitForFifoFunction(pScrn, 64); + + for (;;) { + for (i = 0; i < RADEON_TIMEOUT; i++) { + if (!(INREG(RADEON_RBBM_STATUS) & RADEON_RBBM_ACTIVE)) { + RADEONEngineFlush(pScrn); + return; + } + } + RADEONTRACE(("Idle timed out: %d entries, stat=0x%08x\n", + INREG(RADEON_RBBM_STATUS) & RADEON_RBBM_FIFOCNT_MASK, + INREG(RADEON_RBBM_STATUS))); + xf86DrvMsg(pScrn->scrnIndex, X_ERROR, + "Idle timed out, resetting engine...\n"); + RADEONEngineReset(pScrn); + RADEONEngineRestore(pScrn); +#ifdef XF86DRI + if (info->directRenderingEnabled) { + RADEONCP_RESET(pScrn, info); + RADEONCP_START(pScrn, info); + } +#endif + } +} diff --git a/src/radeon_cursor.c b/src/radeon_cursor.c index c08550d1..fc249ffe 100644 --- a/src/radeon_cursor.c +++ b/src/radeon_cursor.c @@ -68,9 +68,7 @@ static CARD32 mono_cursor_color[] = { #define CURSOR_WIDTH 64 #define CURSOR_HEIGHT 64 -#define COMMON_CURSOR_SWAPPING_START() \ - if (info->accel && info->accel->Sync) \ - info->accel->Sync(pScrn); +#define COMMON_CURSOR_SWAPPING_START() RADEON_SYNC(info, pScrn) /* * The cursor bits are always 32bpp. On MSBFirst buses, @@ -108,7 +106,7 @@ static CARD32 mono_cursor_color[] = { static void RADEONSetCursorColors(ScrnInfoPtr pScrn, int bg, int fg) { RADEONInfoPtr info = RADEONPTR(pScrn); - CARD32 *pixels = (CARD32 *)(pointer)(info->FB + info->cursor_start); + CARD32 *pixels = (CARD32 *)(pointer)(info->FB + info->cursor_offset); int pixel, i; CURSOR_SWAPPING_DECL_MMIO @@ -173,7 +171,7 @@ static void RADEONSetCursorPosition(ScrnInfoPtr pScrn, int x, int y) OUTREG(RADEON_CUR_HORZ_VERT_POSN, (RADEON_CUR_LOCK | ((xorigin ? 0 : x) << 16) | (yorigin ? 0 : y))); - OUTREG(RADEON_CUR_OFFSET, info->cursor_start + yorigin * stride); + OUTREG(RADEON_CUR_OFFSET, info->cursor_offset + yorigin * stride); } else { OUTREG(RADEON_CUR2_HORZ_VERT_OFF, (RADEON_CUR2_LOCK | (xorigin << 16) @@ -182,7 +180,7 @@ static void RADEONSetCursorPosition(ScrnInfoPtr pScrn, int x, int y) | ((xorigin ? 0 : x) << 16) | (yorigin ? 0 : y))); OUTREG(RADEON_CUR2_OFFSET, - info->cursor_start + pScrn->fbOffset + yorigin * stride); + info->cursor_offset + pScrn->fbOffset + yorigin * stride); } } @@ -195,7 +193,7 @@ static void RADEONLoadCursorImage(ScrnInfoPtr pScrn, unsigned char *image) RADEONInfoPtr info = RADEONPTR(pScrn); unsigned char *RADEONMMIO = info->MMIO; CARD8 *s = (CARD8 *)(pointer)image; - CARD32 *d = (CARD32 *)(pointer)(info->FB + info->cursor_start); + CARD32 *d = (CARD32 *)(pointer)(info->FB + info->cursor_offset); CARD32 save1 = 0; CARD32 save2 = 0; CARD8 chunk; @@ -279,7 +277,7 @@ static Bool RADEONUseHWCursor(ScreenPtr pScreen, CursorPtr pCurs) ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; RADEONInfoPtr info = RADEONPTR(pScrn); - return info->cursor_start ? TRUE : FALSE; + return info->cursor_offset ? TRUE : FALSE; } #ifdef ARGB_CURSOR @@ -290,7 +288,7 @@ static Bool RADEONUseHWCursorARGB (ScreenPtr pScreen, CursorPtr pCurs) ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; RADEONInfoPtr info = RADEONPTR(pScrn); - if (info->cursor_start && + if (info->cursor_offset && pCurs->bits->height <= CURSOR_HEIGHT && pCurs->bits->width <= CURSOR_WIDTH) return TRUE; return FALSE; @@ -300,7 +298,7 @@ static void RADEONLoadCursorARGB (ScrnInfoPtr pScrn, CursorPtr pCurs) { RADEONInfoPtr info = RADEONPTR(pScrn); unsigned char *RADEONMMIO = info->MMIO; - CARD32 *d = (CARD32 *)(pointer)(info->FB + info->cursor_start); + CARD32 *d = (CARD32 *)(pointer)(info->FB + info->cursor_offset); int x, y, w, h; CARD32 save1 = 0; CARD32 save2 = 0; @@ -361,6 +359,18 @@ static void RADEONLoadCursorARGB (ScrnInfoPtr pScrn, CursorPtr pCurs) #endif +#ifdef USE_EXA +static void +ATICursorSave(ScreenPtr pScreen, ExaOffscreenArea *area) +{ + ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; + RADEONInfoPtr info = RADEONPTR(pScrn); + + info->cursorArea = NULL; + info->cursor_offset = 0; +} +#endif + /* Initialize hardware cursor support. */ Bool RADEONCursorInit(ScreenPtr pScreen) @@ -368,7 +378,6 @@ Bool RADEONCursorInit(ScreenPtr pScreen) ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; RADEONInfoPtr info = RADEONPTR(pScrn); xf86CursorInfoPtr cursor; - FBAreaPtr fbarea; int width; int width_bytes; int height; @@ -405,29 +414,49 @@ Bool RADEONCursorInit(ScreenPtr pScreen) width = pScrn->displayWidth; width_bytes = width * (pScrn->bitsPerPixel / 8); height = (size_bytes + width_bytes - 1) / width_bytes; - fbarea = xf86AllocateOffscreenArea(pScreen, - width, - height, - 256, - NULL, - NULL, - NULL); - - if (!fbarea) { - info->cursor_start = 0; - xf86DrvMsg(pScrn->scrnIndex, X_WARNING, + +#ifdef USE_EXA + if (info->useEXA) { + info->cursorArea = exaOffscreenAlloc(pScreen, size_bytes, + 128, TRUE, ATICursorSave, info); + + if (!info->cursorArea) { + xf86DrvMsg(pScrn->scrnIndex, X_WARNING, + "Hardware cursor disabled" + " due to insufficient offscreen memory\n"); + info->cursor_offset = 0; + } else { + info->cursor_offset = info->cursorArea->offset; + } + + RADEONTRACE(("RADEONCursorInit (0x%08x-0x%08x)\n", + info->cursor_offset, + info->cursor_offset + info->cursorArea->size)); + } +#endif /* USE_EXA */ +#ifdef USE_XAA + if (!info->useEXA) { + FBAreaPtr fbarea; + + fbarea = xf86AllocateOffscreenArea(pScreen, width, height, + 256, NULL, NULL, NULL); + + if (!fbarea) { + info->cursor_offset = 0; + xf86DrvMsg(pScrn->scrnIndex, X_WARNING, "Hardware cursor disabled" " due to insufficient offscreen memory\n"); - } else { - info->cursor_start = RADEON_ALIGN((fbarea->box.x1 + - fbarea->box.y1 * width) * - info->CurrentLayout.pixel_bytes, - 256); - info->cursor_end = info->cursor_start + size_bytes; + } else { + info->cursor_offset = RADEON_ALIGN((fbarea->box.x1 + + fbarea->box.y1 * width) * + info->CurrentLayout.pixel_bytes, + 256); + info->cursor_end = info->cursor_offset + size_bytes; + } + RADEONTRACE(("RADEONCursorInit (0x%08x-0x%08x)\n", + info->cursor_offset, info->cursor_end)); } - - RADEONTRACE(("RADEONCursorInit (0x%08x-0x%08x)\n", - info->cursor_start, info->cursor_end)); +#endif return xf86InitCursor(pScreen, cursor); } diff --git a/src/radeon_dga.c b/src/radeon_dga.c index 9c70d780..06fc62e9 100644 --- a/src/radeon_dga.c +++ b/src/radeon_dga.c @@ -61,9 +61,10 @@ static int RADEON_GetViewport(ScrnInfoPtr); static void RADEON_SetViewport(ScrnInfoPtr, int, int, int); static void RADEON_FillRect(ScrnInfoPtr, int, int, int, int, unsigned long); static void RADEON_BlitRect(ScrnInfoPtr, int, int, int, int, int, int); +#ifdef USE_XAA static void RADEON_BlitTransRect(ScrnInfoPtr, int, int, int, int, int, int, unsigned long); - +#endif static DGAModePtr RADEONSetupDGAMode(ScrnInfoPtr pScrn, DGAModePtr modes, @@ -112,7 +113,19 @@ SECOND_PASS: if (pixmap) currentMode->flags |= DGA_PIXMAP_AVAILABLE; - if (info->accel) { +#ifdef USE_EXA + if (info->useEXA) { + /* We need to fill in RADEON_FillRect and RADEON_BlitRect and + * connect them in RADEONDGAInit before turning these on. + */ + /*if (info->exa.accel.PrepareSolid && info->exa.accel.Solid) + currentMode->flags |= DGA_FILL_RECT; + if (info->exa.accel.PrepareCopy && info->exa.accel.Copy) + currentMode->flags |= DGA_BLIT_RECT | DGA_BLIT_RECT_TRANS;*/ + } +#endif /* USE_EXA */ +#ifdef USE_XAA + if (!info->useEXA && info->accel) { if (info->accel->SetupForSolidFill && info->accel->SubsequentSolidFillRect) currentMode->flags |= DGA_FILL_RECT; @@ -124,6 +137,8 @@ SECOND_PASS: DGA_BLIT_RECT | DGA_BLIT_RECT_TRANS)) currentMode->flags &= ~DGA_CONCURRENT_ACCESS; } +#endif /* USE_XAA */ + if (pMode->Flags & V_DBLSCAN) currentMode->flags |= DGA_DOUBLESCAN; if (pMode->Flags & V_INTERLACE) @@ -237,7 +252,19 @@ Bool RADEONDGAInit(ScreenPtr pScreen) info->DGAFuncs.BlitRect = NULL; info->DGAFuncs.BlitTransRect = NULL; - if (info->accel) { +#ifdef USE_EXA + /*info->DGAFuncs.Sync = info->exa.accel->Sync;*/ + if (info->useEXA) { + /*if (info->exa.accel.PrepareSolid && info->exa.accel.Solid) { + info->DGAFuncs.FillRect = RADEON_FillRect; + } + if (info->exa.accel.PrepareCopy && info->exa.accel.Copy) { + info->DGAFuncs.BlitRect = RADEON_BlitRect; + }*/ + } +#endif /* USE_EXA */ +#ifdef USE_XAA + if (!info->useEXA && info->accel) { info->DGAFuncs.Sync = info->accel->Sync; if (info->accel->SetupForSolidFill && info->accel->SubsequentSolidFillRect) @@ -248,6 +275,7 @@ Bool RADEONDGAInit(ScreenPtr pScreen) info->DGAFuncs.BlitTransRect = RADEON_BlitTransRect; } } +#endif /* USE_XAA */ return DGAInit(pScreen, &info->DGAFuncs, modes, num); } @@ -332,17 +360,33 @@ static void RADEON_SetViewport(ScrnInfoPtr pScrn, int x, int y, int flags) info->DGAViewportStatus = 0; /* FIXME */ } + static void RADEON_FillRect(ScrnInfoPtr pScrn, int x, int y, int w, int h, unsigned long color) { RADEONInfoPtr info = RADEONPTR(pScrn); - (*info->accel->SetupForSolidFill)(pScrn, color, GXcopy, (CARD32)(~0)); - (*info->accel->SubsequentSolidFillRect)(pScrn, x, y, w, h); +#ifdef USE_EXA + /* XXX */ + if (info->useEXA) { + /* + info->exa.accel.PrepareSolid(pScrn, color, GXcopy, (CARD32)(~0)); + info->exa.accel.Solid(pScrn, x, y, x+w, y+h); + info->exa.accel.DoneSolid(); + */ + RADEON_MARK_SYNC(info, pScrn); + } +#endif /* USE_EXA */ +#ifdef USE_XAA + if (!info->useEXA) { + (*info->accel->SetupForSolidFill)(pScrn, color, GXcopy, (CARD32)(~0)); + (*info->accel->SubsequentSolidFillRect)(pScrn, x, y, w, h); + if (pScrn->bitsPerPixel == info->CurrentLayout.bitsPerPixel) + RADEON_MARK_SYNC(info, pScrn); + } +#endif /* USE_XAA */ - if (pScrn->bitsPerPixel == info->CurrentLayout.bitsPerPixel) - SET_SYNC_FLAG(info->accel); } static void RADEON_BlitRect(ScrnInfoPtr pScrn, @@ -353,15 +397,30 @@ static void RADEON_BlitRect(ScrnInfoPtr pScrn, int xdir = ((srcx < dstx) && (srcy == dsty)) ? -1 : 1; int ydir = (srcy < dsty) ? -1 : 1; - (*info->accel->SetupForScreenToScreenCopy)(pScrn, xdir, ydir, - GXcopy, (CARD32)(~0), -1); - (*info->accel->SubsequentScreenToScreenCopy)(pScrn, srcx, srcy, - dstx, dsty, w, h); - - if (pScrn->bitsPerPixel == info->CurrentLayout.bitsPerPixel) - SET_SYNC_FLAG(info->accel); + #ifdef USE_EXA + /* XXX */ + if (info->useEXA) { + /* + info->exa.accel.PrepareCopy(pScrn, color, GXcopy, (CARD32)(~0)); + info->exa.accel.Copy(pScrn, srcx, srcy, dstx, dsty, w, h); + info->exa.accel.DoneCopy(); + */ + RADEON_MARK_SYNC(info, pScrn); + } +#endif /* USE_EXA */ +#ifdef USE_XAA + if (!info->useEXA) { + (*info->accel->SetupForScreenToScreenCopy)(pScrn, xdir, ydir, + GXcopy, (CARD32)(~0), -1); + (*info->accel->SubsequentScreenToScreenCopy)(pScrn, srcx, srcy, + dstx, dsty, w, h); + if (pScrn->bitsPerPixel == info->CurrentLayout.bitsPerPixel) + RADEON_MARK_SYNC(info, pScrn); + } +#endif /* USE_XAA */ } +#ifdef USE_XAA static void RADEON_BlitTransRect(ScrnInfoPtr pScrn, int srcx, int srcy, int w, int h, int dstx, int dsty, unsigned long color) @@ -371,7 +430,6 @@ static void RADEON_BlitTransRect(ScrnInfoPtr pScrn, int ydir = (srcy < dsty) ? -1 : 1; info->XAAForceTransBlit = TRUE; - (*info->accel->SetupForScreenToScreenCopy)(pScrn, xdir, ydir, GXcopy, (CARD32)(~0), color); @@ -381,8 +439,9 @@ static void RADEON_BlitTransRect(ScrnInfoPtr pScrn, dstx, dsty, w, h); if (pScrn->bitsPerPixel == info->CurrentLayout.bitsPerPixel) - SET_SYNC_FLAG(info->accel); + RADEON_MARK_SYNC(info, pScrn); } +#endif /* USE_XAA */ static Bool RADEON_OpenFramebuffer(ScrnInfoPtr pScrn, char **name, diff --git a/src/radeon_dri.c b/src/radeon_dri.c index 8e2fe447..892e5d52 100644 --- a/src/radeon_dri.c +++ b/src/radeon_dri.c @@ -42,6 +42,7 @@ /* Driver data structures */ #include "radeon.h" +#include "radeon_video.h" #include "radeon_reg.h" #include "radeon_macros.h" #include "radeon_dri.h" @@ -347,17 +348,15 @@ static void RADEONEnterServer(ScreenPtr pScreen) { ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; RADEONInfoPtr info = RADEONPTR(pScrn); -#ifdef RENDER RADEONSAREAPrivPtr pSAREAPriv; -#endif - if (info->accel) info->accel->NeedToSync = TRUE; -#ifdef RENDER + RADEON_MARK_SYNC(info, pScrn); + pSAREAPriv = DRIGetSAREAPrivate(pScrn->pScreen); if (pSAREAPriv->ctxOwner != DRIGetContext(pScrn->pScreen)) - info->RenderInited3D = FALSE; -#endif + info->XInited3D = FALSE; + /* TODO: Fix this more elegantly. * Sometimes (especially with multiple DRI clients), this code @@ -528,6 +527,7 @@ static void RADEONDRIInitBuffers(WindowPtr pWin, RegionPtr prgn, CARD32 indx) static void RADEONDRIMoveBuffers(WindowPtr pParent, DDXPointRec ptOldOrg, RegionPtr prgnSrc, CARD32 indx) { +#ifdef USE_XAA ScreenPtr pScreen = pParent->drawable.pScreen; ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; RADEONInfoPtr info = RADEONPTR(pScrn); @@ -551,6 +551,10 @@ static void RADEONDRIMoveBuffers(WindowPtr pParent, DDXPointRec ptOldOrg, int dx = pParent->drawable.x - ptOldOrg.x; int dy = pParent->drawable.y - ptOldOrg.y; + /* XXX: Fix in EXA case. */ + if (info->useEXA) + return; + /* If the copy will overlap in Y, reverse the order */ if (dy > 0) { ydir = -1; @@ -683,6 +687,7 @@ static void RADEONDRIMoveBuffers(WindowPtr pParent, DDXPointRec ptOldOrg, DEALLOCATE_LOCAL(pboxNew1); info->accel->NeedToSync = TRUE; +#endif /* USE_XAA */ } static void RADEONDRIInitGARTValues(RADEONInfoPtr info) @@ -1183,7 +1188,10 @@ static void RADEONDRICPInit(ScrnInfoPtr pScrn) /* Make sure the CP is on for the X server */ RADEONCP_START(pScrn, info); - info->dst_pitch_offset = info->frontPitchOffset; +#ifdef USE_XAA + if (!info->useEXA) + info->dst_pitch_offset = info->frontPitchOffset; +#endif } @@ -1276,7 +1284,7 @@ Bool RADEONDRIScreenInit(ScreenPtr pScreen) RADEON_VERSION_MAJOR_TILED : RADEON_VERSION_MAJOR; pDRIInfo->ddxDriverMinorVersion = RADEON_VERSION_MINOR; pDRIInfo->ddxDriverPatchVersion = RADEON_VERSION_PATCH; - pDRIInfo->frameBufferPhysicalAddress = info->LinearAddr; + pDRIInfo->frameBufferPhysicalAddress = (void *)info->LinearAddr; pDRIInfo->frameBufferSize = info->FbMapSize; pDRIInfo->frameBufferStride = (pScrn->displayWidth * info->CurrentLayout.pixel_bytes); @@ -1602,7 +1610,7 @@ Bool RADEONDRIFinishScreenInit(ScreenPtr pScreen) #endif /* Have shadowfb run only while there is 3d active. */ - if (info->allowPageFlip /* && info->drmMinor >= 3 */) { + if (!info->useEXA && info->allowPageFlip /* && info->drmMinor >= 3 */) { ShadowFBInit( pScreen, RADEONDRIRefreshArea ); } else { info->allowPageFlip = 0; @@ -1795,6 +1803,8 @@ static void RADEONDRIRefreshArea(ScrnInfoPtr pScrn, int num, BoxPtr pbox) if (!pSAREAPriv->pfAllowPageFlip && pSAREAPriv->pfCurrentPage == 0) return; +#ifdef USE_XAA + /* XXX: implement for EXA */ /* pretty much a hack. */ if (info->tilingEnabled) info->dst_pitch_offset |= RADEON_DST_TILE_MACRO; @@ -1815,6 +1825,7 @@ static void RADEONDRIRefreshArea(ScrnInfoPtr pScrn, int num, BoxPtr pbox) } } info->dst_pitch_offset &= ~RADEON_DST_TILE_MACRO; +#endif /* USE_XAA */ } static void RADEONEnablePageFlip(ScreenPtr pScreen) @@ -1823,6 +1834,8 @@ static void RADEONEnablePageFlip(ScreenPtr pScreen) RADEONInfoPtr info = RADEONPTR(pScrn); RADEONSAREAPrivPtr pSAREAPriv = DRIGetSAREAPrivate(pScreen); +#ifdef USE_XAA + /* XXX: Fix in EXA case */ if (info->allowPageFlip) { /* pretty much a hack. */ if (info->tilingEnabled) @@ -1843,6 +1856,7 @@ static void RADEONEnablePageFlip(ScreenPtr pScreen) info->dst_pitch_offset &= ~RADEON_DST_TILE_MACRO; pSAREAPriv->pfAllowPageFlip = 1; } +#endif /* USE_XAA */ } static void RADEONDisablePageFlip(ScreenPtr pScreen) @@ -1874,6 +1888,21 @@ static void RADEONDRITransitionTo3d(ScreenPtr pScreen) FBAreaPtr fbarea; int width, height; + info->have3DWindows = 1; + + RADEONChangeSurfaces(pScrn); + RADEONEnablePageFlip(pScreen); + + if (info->cursor_offset != 0) + xf86ForceHWCursor (pScreen, TRUE); + +#ifdef USE_XAA + /* EXA allocates these areas up front, so it doesn't do the following + * stuff. + */ + if (info->useEXA) + return; + /* reserve offscreen area for back and depth buffers and textures */ /* If we still have an area for the back buffer reserved, free it @@ -1889,15 +1918,21 @@ static void RADEONDRITransitionTo3d(ScreenPtr pScreen) xf86QueryLargestOffscreenArea(pScreen, &width, &height, 0, 0, 0); - /* Free Xv linear offscreen memory if necessary */ + /* Free Xv linear offscreen memory if necessary + * FIXME: This is hideous. What about telling xv "oh btw you have no memory + * any more?" -- anholt + */ if (height < (info->depthTexLines + info->backLines)) { - xf86FreeOffscreenLinear(info->videoLinear); - info->videoLinear = NULL; + RADEONPortPrivPtr portPriv = info->adaptor->pPortPrivates[0].ptr; + xf86FreeOffscreenLinear((FBLinearPtr)portPriv->video_memory); + portPriv->video_memory = NULL; xf86QueryLargestOffscreenArea(pScreen, &width, &height, 0, 0, 0); } /* Reserve placeholder area so the other areas will match the * pre-calculated offsets + * FIXME: We may have other locked allocations and thus this would allocate + * in the wrong place. The XV surface allocations seem likely. -- anholt */ fbarea = xf86AllocateOffscreenArea(pScreen, pScrn->displayWidth, height @@ -1929,14 +1964,7 @@ static void RADEONDRITransitionTo3d(ScreenPtr pScreen) "experience screen corruption\n"); xf86FreeOffscreenArea(fbarea); - - info->have3DWindows = 1; - - RADEONChangeSurfaces(pScrn); - RADEONEnablePageFlip(pScreen); - - if (info->cursor_start) - xf86ForceHWCursor (pScreen, TRUE); +#endif /* USE_XAA */ } static void RADEONDRITransitionTo2d(ScreenPtr pScreen) @@ -1952,22 +1980,29 @@ static void RADEONDRITransitionTo2d(ScreenPtr pScreen) /* Shut down shadowing if we've made it back to the front page */ if (pSAREAPriv->pfCurrentPage == 0) { RADEONDisablePageFlip(pScreen); - xf86FreeOffscreenArea(info->backArea); - info->backArea = NULL; +#ifdef USE_XAA + if (!info->useEXA) { + xf86FreeOffscreenArea(info->backArea); + info->backArea = NULL; + } +#endif } else { xf86DrvMsg(pScreen->myNum, X_WARNING, "[dri] RADEONDRITransitionTo2d: " "kernel failed to unflip buffers.\n"); } - xf86FreeOffscreenArea(info->depthTexArea); +#ifdef USE_XAA + if (!info->useEXA) + xf86FreeOffscreenArea(info->depthTexArea); +#endif info->have3DWindows = 0; RADEONChangeSurfaces(pScrn); - if (info->cursor_start) - xf86ForceHWCursor (pScreen, FALSE); + if (info->cursor_offset != 0) + xf86ForceHWCursor (pScreen, FALSE); } void RADEONDRIAllocatePCIGARTTable(ScreenPtr pScreen) diff --git a/src/radeon_driver.c b/src/radeon_driver.c index b49b962d..c082a076 100644 --- a/src/radeon_driver.c +++ b/src/radeon_driver.c @@ -96,13 +96,12 @@ #include "xf86cmap.h" #include "vbe.h" - /* fbdevhw & vgaHW definitions */ + /* fbdevhw * vgaHW definitions */ #ifdef WITH_VGAHW #include "vgaHW.h" #endif #include "fbdevhw.h" - /* DPMS support. */ #define DPMS_SERVER #include <X11/extensions/dpms.h> @@ -193,7 +192,8 @@ typedef enum { OPTION_BIOS_HOTKEYS, OPTION_VGA_ACCESS, OPTION_REVERSE_DDC, - OPTION_LVDS_PROBE_PLL + OPTION_LVDS_PROBE_PLL, + OPTION_ACCELMETHOD } RADEONOpts; static const OptionInfoRec RADEONOptions[] = { @@ -255,6 +255,7 @@ static const OptionInfoRec RADEONOptions[] = { { OPTION_VGA_ACCESS, "VGAAccess", OPTV_BOOLEAN, {0}, TRUE }, { OPTION_REVERSE_DDC, "ReverseDDC", OPTV_BOOLEAN, {0}, FALSE }, { OPTION_LVDS_PROBE_PLL, "LVDSProbePLL", OPTV_BOOLEAN, {0}, FALSE }, + { OPTION_ACCELMETHOD, "AccelMethod", OPTV_STRING, {0}, FALSE }, { -1, NULL, OPTV_NONE, {0}, FALSE } }; @@ -315,12 +316,30 @@ static const char *fbSymbols[] = { NULL }; + +#ifdef USE_EXA +static const char *exaSymbols[] = { + "exaDriverInit", + "exaDriverFini", + "exaOffscreenAlloc", + "exaOffscreenFree", + "exaGetPixmapOffset", + "exaGetPixmapPitch", + "exaGetPixmapSize", + "exaMarkSync", + "exaWaitSync", + NULL +}; +#endif /* USE_EXA */ + +#ifdef USE_XAA static const char *xaaSymbols[] = { "XAACreateInfoRec", "XAADestroyInfoRec", "XAAInit", NULL }; +#endif /* USE_XAA */ #if 0 static const char *xf8_32bppSymbols[] = { @@ -434,7 +453,12 @@ void RADEONLoaderRefSymLists(void) vgahwSymbols, #endif fbSymbols, +#ifdef USE_EXA + exaSymbols, +#endif +#ifdef USE_XAA xaaSymbols, +#endif #if 0 xf8_32bppSymbols, #endif @@ -3963,7 +3987,7 @@ static Bool RADEONPreInitModes(ScrnInfoPtr pScrn, xf86Int10InfoPtr pInt10) info->allowColorTiling ? 2048 : 64 * pScrn->bitsPerPixel, /* pitchInc */ 128, /* minHeight */ - info->MaxLines, /* maxHeight */ + info->MaxLines, /* maxHeight */ pScrn->display->virtualX, pScrn->display->virtualY, info->FbMapSize, @@ -4032,7 +4056,7 @@ static Bool RADEONPreInitModes(ScrnInfoPtr pScrn, xf86Int10InfoPtr pInt10) info->allowColorTiling ? 2048 : 64 * pScrn->bitsPerPixel, /* pitchInc */ 128, /* minHeight */ - info->MaxLines, /* maxHeight */ + info->MaxLines, /* maxHeight */ pScrn->display->virtualX, pScrn->display->virtualY, info->FbMapSize, @@ -4199,31 +4223,72 @@ static Bool RADEONPreInitAccel(ScrnInfoPtr pScrn) { #ifdef XFree86LOADER RADEONInfoPtr info = RADEONPTR(pScrn); + MessageType from; +#if defined(USE_EXA) && defined(USE_XAA) + char *optstr; +#endif if (!xf86ReturnOptValBool(info->Options, OPTION_NOACCEL, FALSE)) { int errmaj = 0, errmin = 0; - info->xaaReq.majorversion = 1; - info->xaaReq.minorversion = 2; + from = X_DEFAULT; +#if defined(USE_EXA) +#if defined(USE_XAA) + optstr = (char *)xf86GetOptValString(info->Options, OPTION_ACCELMETHOD); + if (optstr != NULL) { + if (xf86NameCmp(optstr, "EXA") == 0) { + from = X_CONFIG; + info->useEXA = TRUE; + } else if (xf86NameCmp(optstr, "XAA") == 0) { + from = X_CONFIG; + } + } +#else /* USE_XAA */ + info->useEXA = TRUE; +#endif /* !USE_XAA */ +#endif /* USE_EXA */ + xf86DrvMsg(pScrn->scrnIndex, from, + "Using %s acceleration architecture\n", + info->useEXA ? "EXA" : "XAA"); + +#ifdef USE_EXA + if (info->useEXA) { + info->exaReq.majorversion = 1; + info->exaReq.minorversion = 0; - if (!LoadSubModule(pScrn->module, "xaa", NULL, NULL, NULL, - &info->xaaReq, &errmaj, &errmin)) { - info->xaaReq.minorversion = 1; + if (!LoadSubModule(pScrn->module, "exa", NULL, NULL, NULL, + &info->exaReq, &errmaj, &errmin)) { + LoaderErrorMsg(NULL, "exa", errmaj, errmin); + return FALSE; + } + xf86LoaderReqSymLists(exaSymbols, NULL); + } +#endif /* USE_EXA */ +#ifdef USE_XAA + if (!info->useEXA) { + info->xaaReq.majorversion = 1; + info->xaaReq.minorversion = 2; if (!LoadSubModule(pScrn->module, "xaa", NULL, NULL, NULL, - &info->xaaReq, &errmaj, &errmin)) { - info->xaaReq.minorversion = 0; + &info->xaaReq, &errmaj, &errmin)) { + info->xaaReq.minorversion = 1; if (!LoadSubModule(pScrn->module, "xaa", NULL, NULL, NULL, &info->xaaReq, &errmaj, &errmin)) { - LoaderErrorMsg(NULL, "xaa", errmaj, errmin); - return FALSE; + info->xaaReq.minorversion = 0; + + if (!LoadSubModule(pScrn->module, "xaa", NULL, NULL, NULL, + &info->xaaReq, &errmaj, &errmin)) { + LoaderErrorMsg(NULL, "xaa", errmaj, errmin); + return FALSE; + } } } + xf86LoaderReqSymLists(xaaSymbols, NULL); } - xf86LoaderReqSymLists(xaaSymbols, NULL); +#endif /* USE_XAA */ } -#endif +#endif /* XFree86Loader */ return TRUE; } @@ -4376,6 +4441,11 @@ static Bool RADEONPreInitDRI(ScrnInfoPtr pScrn) info->allowPageFlip = xf86ReturnOptValBool(info->Options, OPTION_PAGE_FLIP, FALSE); + if (info->allowPageFlip && info->useEXA) { + xf86DrvMsg(pScrn->scrnIndex, X_INFO, + "Page flipping not allowed with EXA, disabling.\n"); + info->allowPageFlip = FALSE; + } } xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Page flipping %sabled\n", @@ -4876,7 +4946,8 @@ static void RADEONLoadPalette(ScrnInfoPtr pScrn, int numColors, if (info->CPStarted) DRILock(pScrn->pScreen, 0); #endif - if (info->accelOn && pScrn->pScreen) info->accel->Sync(pScrn); + if (info->accelOn && pScrn->pScreen) + RADEON_SYNC(info, pScrn); if (info->FBDev) { fbdevHWLoadPalette(pScrn, numColors, indices, colors, pVisual); @@ -4999,10 +5070,13 @@ static void RADEONBlockHandler(int i, pointer blockData, RADEONInfoPtr info = RADEONPTR(pScrn); #ifdef XF86DRI - if (info->directRenderingEnabled) + if (info->directRenderingEnabled) { FLUSH_RING(); + } +#endif +#ifdef USE_EXA + info->engineMode = EXA_ENGINEMODE_UNKNOWN; #endif - pScreen->BlockHandler = info->BlockHandler; (*pScreen->BlockHandler) (i, blockData, pTimeout, pReadmask); pScreen->BlockHandler = RADEONBlockHandler; @@ -5010,12 +5084,326 @@ static void RADEONBlockHandler(int i, pointer blockData, if (info->VideoTimerCallback) (*info->VideoTimerCallback)(pScrn, currentTime.milliseconds); -#ifdef RENDER +#if defined(RENDER) && defined(USE_XAA) if(info->RenderCallback) (*info->RenderCallback)(pScrn); #endif +} + + +#ifdef USE_XAA +#ifdef XF86DRI +Bool RADEONSetupMemXAA_DRI(int scrnIndex, ScreenPtr pScreen) +{ + ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; + RADEONInfoPtr info = RADEONPTR(pScrn); + int cpp = info->CurrentLayout.pixel_bytes; + int width_bytes = pScrn->displayWidth * cpp; + int bufferSize; + int depthSize; + int l; + int scanlines; + BoxRec MemBox; + FBAreaPtr fbarea; + + info->frontOffset = 0; + info->frontPitch = pScrn->displayWidth; + info->backPitch = pScrn->displayWidth; + + /* make sure we use 16 line alignment for tiling (8 might be enough). + * Might need that for non-XF86DRI too? + */ + if (info->allowColorTiling) { + bufferSize = (((pScrn->virtualY + 15) & ~15) * width_bytes + + RADEON_BUFFER_ALIGN) & ~RADEON_BUFFER_ALIGN; + } else { + bufferSize = (pScrn->virtualY * width_bytes + + RADEON_BUFFER_ALIGN) & ~RADEON_BUFFER_ALIGN; + } + + /* Due to tiling, the Z buffer pitch must be a multiple of 32 pixels, + * which is always the case if color tiling is used due to color pitch + * but not necessarily otherwise, and its height a multiple of 16 lines. + */ + info->depthPitch = (pScrn->displayWidth + 31) & ~31; + depthSize = ((((pScrn->virtualY + 15) & ~15) * info->depthPitch + * cpp + RADEON_BUFFER_ALIGN) & ~RADEON_BUFFER_ALIGN); + + switch (info->CPMode) { + case RADEON_DEFAULT_CP_PIO_MODE: + xf86DrvMsg(pScrn->scrnIndex, X_INFO, "CP in PIO mode\n"); + break; + case RADEON_DEFAULT_CP_BM_MODE: + xf86DrvMsg(pScrn->scrnIndex, X_INFO, "CP in BM mode\n"); + break; + default: + xf86DrvMsg(pScrn->scrnIndex, X_INFO, "CP in UNKNOWN mode\n"); + break; + } + + xf86DrvMsg(pScrn->scrnIndex, X_INFO, + "Using %d MB GART aperture\n", info->gartSize); + xf86DrvMsg(pScrn->scrnIndex, X_INFO, + "Using %d MB for the ring buffer\n", info->ringSize); + xf86DrvMsg(pScrn->scrnIndex, X_INFO, + "Using %d MB for vertex/indirect buffers\n", info->bufSize); + xf86DrvMsg(pScrn->scrnIndex, X_INFO, + "Using %d MB for GART textures\n", info->gartTexSize); + + /* Try for front, back, depth, and three framebuffers worth of + * pixmap cache. Should be enough for a fullscreen background + * image plus some leftovers. + */ + info->textureSize = info->FbMapSize - 5 * bufferSize - depthSize; + + /* If that gives us less than half the available memory, let's + * be greedy and grab some more. Sorry, I care more about 3D + * performance than playing nicely, and you'll get around a full + * framebuffer's worth of pixmap cache anyway. + */ + if (info->textureSize < (int)info->FbMapSize / 2) { + info->textureSize = info->FbMapSize - 4 * bufferSize - depthSize; + } + if (info->textureSize < (int)info->FbMapSize / 2) { + info->textureSize = info->FbMapSize - 3 * bufferSize - depthSize; + } + + /* If there's still no space for textures, try without pixmap cache, but + * never use the reserved space, the space hw cursor and PCIGART table might + * use. + */ + if (info->textureSize < 0) { + info->textureSize = info->FbMapSize - 2 * bufferSize - depthSize + - 2 * width_bytes - 16384 - RADEON_PCIGART_TABLE_SIZE; + } + + /* Check to see if there is more room available after the 8192nd + * scanline for textures + */ + /* FIXME: what's this good for? condition is pretty much impossible to meet */ + if ((int)info->FbMapSize - 8192*width_bytes - bufferSize - depthSize + > info->textureSize) { + info->textureSize = + info->FbMapSize - 8192*width_bytes - bufferSize - depthSize; + } + + /* If backbuffer is disabled, don't allocate memory for it */ + if (info->noBackBuffer) { + info->textureSize += bufferSize; + } + + /* RADEON_BUFFER_ALIGN is not sufficient for backbuffer! + At least for pageflip + color tiling, need to make sure it's 16 scanlines aligned, + otherwise the copy-from-front-to-back will fail (width_bytes * 16 will also guarantee + it's still 4kb aligned for tiled case). Need to round up offset (might get into cursor + area otherwise). + This might cause some space at the end of the video memory to be unused, since it + can't be used (?) due to that log_tex_granularity thing??? + Could use different copyscreentoscreen function for the pageflip copies + (which would use different src and dst offsets) to avoid this. */ + if (info->allowColorTiling && !info->noBackBuffer) { + info->textureSize = info->FbMapSize - ((info->FbMapSize - info->textureSize + + width_bytes * 16 - 1) / (width_bytes * 16)) * (width_bytes * 16); + } + if (info->textureSize > 0) { + l = RADEONMinBits((info->textureSize-1) / RADEON_NR_TEX_REGIONS); + if (l < RADEON_LOG_TEX_GRANULARITY) + l = RADEON_LOG_TEX_GRANULARITY; + /* Round the texture size up to the nearest whole number of + * texture regions. Again, be greedy about this, don't + * round down. + */ + info->log2TexGran = l; + info->textureSize = (info->textureSize >> l) << l; + } else { + info->textureSize = 0; + } + + /* Set a minimum usable local texture heap size. This will fit + * two 256x256x32bpp textures. + */ + if (info->textureSize < 512 * 1024) { + info->textureOffset = 0; + info->textureSize = 0; + } + + if (info->allowColorTiling && !info->noBackBuffer) { + info->textureOffset = ((info->FbMapSize - info->textureSize) / + (width_bytes * 16)) * (width_bytes * 16); + } + else { + /* Reserve space for textures */ + info->textureOffset = ((info->FbMapSize - info->textureSize + + RADEON_BUFFER_ALIGN) & + ~(CARD32)RADEON_BUFFER_ALIGN); + } + + /* Reserve space for the shared depth + * buffer. + */ + info->depthOffset = ((info->textureOffset - depthSize + + RADEON_BUFFER_ALIGN) & + ~(CARD32)RADEON_BUFFER_ALIGN); + + /* Reserve space for the shared back buffer */ + if (info->noBackBuffer) { + info->backOffset = info->depthOffset; + } else { + info->backOffset = ((info->depthOffset - bufferSize + + RADEON_BUFFER_ALIGN) & + ~(CARD32)RADEON_BUFFER_ALIGN); + } + + info->backY = info->backOffset / width_bytes; + info->backX = (info->backOffset - (info->backY * width_bytes)) / cpp; + + scanlines = info->FbMapSize / width_bytes; + if (scanlines > 8191) + scanlines = 8191; + + MemBox.x1 = 0; + MemBox.y1 = 0; + MemBox.x2 = pScrn->displayWidth; + MemBox.y2 = scanlines; + + if (!xf86InitFBManager(pScreen, &MemBox)) { + xf86DrvMsg(scrnIndex, X_ERROR, + "Memory manager initialization to " + "(%d,%d) (%d,%d) failed\n", + MemBox.x1, MemBox.y1, MemBox.x2, MemBox.y2); + return FALSE; + } else { + int width, height; + + xf86DrvMsg(scrnIndex, X_INFO, + "Memory manager initialized to (%d,%d) (%d,%d)\n", + MemBox.x1, MemBox.y1, MemBox.x2, MemBox.y2); + /* why oh why can't we just request modes which are guaranteed to be 16 lines + aligned... sigh */ + if ((fbarea = xf86AllocateOffscreenArea(pScreen, + pScrn->displayWidth, + info->allowColorTiling ? + ((pScrn->virtualY + 15) & ~15) + - pScrn->virtualY + 2 : 2, + 0, NULL, NULL, + NULL))) { + xf86DrvMsg(scrnIndex, X_INFO, + "Reserved area from (%d,%d) to (%d,%d)\n", + fbarea->box.x1, fbarea->box.y1, + fbarea->box.x2, fbarea->box.y2); + } else { + xf86DrvMsg(scrnIndex, X_ERROR, "Unable to reserve area\n"); + } + + RADEONDRIAllocatePCIGARTTable(pScreen); + + if (xf86QueryLargestOffscreenArea(pScreen, &width, + &height, 0, 0, 0)) { + xf86DrvMsg(scrnIndex, X_INFO, + "Largest offscreen area available: %d x %d\n", + width, height); + + /* Lines in offscreen area needed for depth buffer and + * textures + */ + info->depthTexLines = (scanlines + - info->depthOffset / width_bytes); + info->backLines = (scanlines + - info->backOffset / width_bytes + - info->depthTexLines); + info->backArea = NULL; + } else { + xf86DrvMsg(scrnIndex, X_ERROR, + "Unable to determine largest offscreen area " + "available\n"); + return FALSE; + } + } + + xf86DrvMsg(scrnIndex, X_INFO, + "Will use back buffer at offset 0x%x\n", + info->backOffset); + xf86DrvMsg(scrnIndex, X_INFO, + "Will use depth buffer at offset 0x%x\n", + info->depthOffset); + xf86DrvMsg(scrnIndex, X_INFO, + "Will use %d kb for PCI GART table at offset 0x%x\n", + info->pciGartSize/1024, info->pciGartOffset); + xf86DrvMsg(scrnIndex, X_INFO, + "Will use %d kb for textures at offset 0x%x\n", + info->textureSize/1024, info->textureOffset); + + info->frontPitchOffset = (((info->frontPitch * cpp / 64) << 22) | + ((info->frontOffset + info->fbLocation) >> 10)); + + info->backPitchOffset = (((info->backPitch * cpp / 64) << 22) | + ((info->backOffset + info->fbLocation) >> 10)); + + info->depthPitchOffset = (((info->depthPitch * cpp / 64) << 22) | + ((info->depthOffset + info->fbLocation) >> 10)); +} +#endif /* XF86DRI */ + +Bool RADEONSetupMemXAA(int scrnIndex, ScreenPtr pScreen) +{ + ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; + RADEONInfoPtr info = RADEONPTR(pScrn); + BoxRec MemBox; + int y2; + + int width_bytes = pScrn->displayWidth * info->CurrentLayout.pixel_bytes; + MemBox.x1 = 0; + MemBox.y1 = 0; + MemBox.x2 = pScrn->displayWidth; + y2 = info->FbMapSize / width_bytes; + if (y2 >= 32768) + y2 = 32767; /* because MemBox.y2 is signed short */ + MemBox.y2 = y2; + + /* The acceleration engine uses 14 bit + * signed coordinates, so we can't have any + * drawable caches beyond this region. + */ + if (MemBox.y2 > 8191) + MemBox.y2 = 8191; + + if (!xf86InitFBManager(pScreen, &MemBox)) { + xf86DrvMsg(scrnIndex, X_ERROR, + "Memory manager initialization to " + "(%d,%d) (%d,%d) failed\n", + MemBox.x1, MemBox.y1, MemBox.x2, MemBox.y2); + return FALSE; + } else { + int width, height; + FBAreaPtr fbarea; + + xf86DrvMsg(scrnIndex, X_INFO, + "Memory manager initialized to (%d,%d) (%d,%d)\n", + MemBox.x1, MemBox.y1, MemBox.x2, MemBox.y2); + if ((fbarea = xf86AllocateOffscreenArea(pScreen, + pScrn->displayWidth, + info->allowColorTiling ? + ((pScrn->virtualY + 15) & ~15) + - pScrn->virtualY + 2 : 2, + 0, NULL, NULL, + NULL))) { + xf86DrvMsg(scrnIndex, X_INFO, + "Reserved area from (%d,%d) to (%d,%d)\n", + fbarea->box.x1, fbarea->box.y1, + fbarea->box.x2, fbarea->box.y2); + } else { + xf86DrvMsg(scrnIndex, X_ERROR, "Unable to reserve area\n"); + } + if (xf86QueryLargestOffscreenArea(pScreen, &width, &height, + 0, 0, 0)) { + xf86DrvMsg(scrnIndex, X_INFO, + "Largest offscreen area available: %d x %d\n", + width, height); + } + } } +#endif /* USE_XAA */ /* Called at the start of each server generation. */ _X_EXPORT Bool RADEONScreenInit(int scrnIndex, ScreenPtr pScreen, @@ -5023,8 +5411,7 @@ _X_EXPORT Bool RADEONScreenInit(int scrnIndex, ScreenPtr pScreen, { ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; RADEONInfoPtr info = RADEONPTR(pScrn); - BoxRec MemBox; - int y2; + int hasDRI = 0; #ifdef RENDER int subPixelOrder = SubPixelUnknown; char* s; @@ -5061,6 +5448,13 @@ _X_EXPORT Bool RADEONScreenInit(int scrnIndex, ScreenPtr pScreen, } } +#if 0 + if (info->allowColorTiling && info->useEXA) { + xf86DrvMsg(pScrn->scrnIndex, X_INFO, + "Color tiling not supported yet with EXA, disabling\n"); + info->allowColorTiling = FALSE; + } +#endif if (info->allowColorTiling && (pScrn->virtualX > info->MaxSurfaceWidth)) { xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Color tiling not supported with virtual x resolutions larger than %d, disabling\n", @@ -5179,6 +5573,8 @@ _X_EXPORT Bool RADEONScreenInit(int scrnIndex, ScreenPtr pScreen, xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Depth moves disabled by default\n"); } + + hasDRI = info->directRenderingEnabled; #endif RADEONSetFBLocation(pScrn); @@ -5229,300 +5625,25 @@ _X_EXPORT Bool RADEONScreenInit(int scrnIndex, ScreenPtr pScreen, } #endif /* Memory manager setup */ -#ifdef XF86DRI - if (info->directRenderingEnabled) { - FBAreaPtr fbarea; - int cpp = info->CurrentLayout.pixel_bytes; - int width_bytes = pScrn->displayWidth * cpp; - int bufferSize; - int depthSize; - int l; - int scanlines; - - info->frontOffset = 0; - info->frontPitch = pScrn->displayWidth; - info->backPitch = pScrn->displayWidth; - /* make sure we use 16 line alignment for tiling (8 might be enough). - Might need that for non-XF86DRI too? */ - if (info->allowColorTiling) { - bufferSize = (((pScrn->virtualY + 15) & ~15) * width_bytes - + RADEON_BUFFER_ALIGN) & ~RADEON_BUFFER_ALIGN; - } - else { - bufferSize = (pScrn->virtualY * width_bytes - + RADEON_BUFFER_ALIGN) & ~RADEON_BUFFER_ALIGN; - } - /* Due to tiling, the Z buffer pitch must be a multiple of 32 pixels, - * which is always the case if color tiling is used due to color pitch - * but not necessarily otherwise, and its height a multiple of 16 lines. - */ - info->depthPitch = (pScrn->displayWidth + 31) & ~31; - depthSize = ((((pScrn->virtualY + 15) & ~15) * info->depthPitch - * cpp + RADEON_BUFFER_ALIGN) & ~RADEON_BUFFER_ALIGN); - - switch (info->CPMode) { - case RADEON_DEFAULT_CP_PIO_MODE: - xf86DrvMsg(pScrn->scrnIndex, X_INFO, "CP in PIO mode\n"); - break; - case RADEON_DEFAULT_CP_BM_MODE: - xf86DrvMsg(pScrn->scrnIndex, X_INFO, "CP in BM mode\n"); - break; - default: - xf86DrvMsg(pScrn->scrnIndex, X_INFO, "CP in UNKNOWN mode\n"); - break; - } - - xf86DrvMsg(pScrn->scrnIndex, X_INFO, - "Using %d MB GART aperture\n", info->gartSize); - xf86DrvMsg(pScrn->scrnIndex, X_INFO, - "Using %d MB for the ring buffer\n", info->ringSize); - xf86DrvMsg(pScrn->scrnIndex, X_INFO, - "Using %d MB for vertex/indirect buffers\n", info->bufSize); - xf86DrvMsg(pScrn->scrnIndex, X_INFO, - "Using %d MB for GART textures\n", info->gartTexSize); - - /* Try for front, back, depth, and three framebuffers worth of - * pixmap cache. Should be enough for a fullscreen background - * image plus some leftovers. - */ - info->textureSize = info->FbMapSize - 5 * bufferSize - depthSize; - - /* If that gives us less than half the available memory, let's - * be greedy and grab some more. Sorry, I care more about 3D - * performance than playing nicely, and you'll get around a full - * framebuffer's worth of pixmap cache anyway. - */ - if (info->textureSize < (int)info->FbMapSize / 2) { - info->textureSize = info->FbMapSize - 4 * bufferSize - depthSize; - } - if (info->textureSize < (int)info->FbMapSize / 2) { - info->textureSize = info->FbMapSize - 3 * bufferSize - depthSize; - } - /* If there's still no space for textures, try without pixmap cache, but never use - the reserved space, the space hw cursor and PCIGART table might use */ - if (info->textureSize < 0) { - info->textureSize = info->FbMapSize - 2 * bufferSize - depthSize - - 2 * width_bytes - 16384 - RADEON_PCIGART_TABLE_SIZE; - } - - /* Check to see if there is more room available after the 8192nd - scanline for textures */ - /* FIXME: what's this good for? condition is pretty much impossible to meet */ - if ((int)info->FbMapSize - 8192*width_bytes - bufferSize - depthSize - > info->textureSize) { - info->textureSize = - info->FbMapSize - 8192*width_bytes - bufferSize - depthSize; - } - - /* If backbuffer is disabled, don't allocate memory for it */ - if (info->noBackBuffer) { - info->textureSize += bufferSize; - } - - /* RADEON_BUFFER_ALIGN is not sufficient for backbuffer! - At least for pageflip + color tiling, need to make sure it's 16 scanlines aligned, - otherwise the copy-from-front-to-back will fail (width_bytes * 16 will also guarantee - it's still 4kb aligned for tiled case). Need to round up offset (might get into cursor - area otherwise). - This might cause some space at the end of the video memory to be unused, since it - can't be used (?) due to that log_tex_granularity thing??? - Could use different copyscreentoscreen function for the pageflip copies - (which would use different src and dst offsets) to avoid this. */ - if (info->allowColorTiling && !info->noBackBuffer) { - info->textureSize = info->FbMapSize - ((info->FbMapSize - info->textureSize + - width_bytes * 16 - 1) / (width_bytes * 16)) * (width_bytes * 16); - } - if (info->textureSize > 0) { - l = RADEONMinBits((info->textureSize-1) / RADEON_NR_TEX_REGIONS); - if (l < RADEON_LOG_TEX_GRANULARITY) l = RADEON_LOG_TEX_GRANULARITY; - - /* Round the texture size up to the nearest whole number of - * texture regions. Again, be greedy about this, don't - * round down. - */ - info->log2TexGran = l; - info->textureSize = (info->textureSize >> l) << l; - } else { - info->textureSize = 0; - } - - /* Set a minimum usable local texture heap size. This will fit - * two 256x256x32bpp textures. - */ - if (info->textureSize < 512 * 1024) { - info->textureOffset = 0; - info->textureSize = 0; - } - - if (info->allowColorTiling && !info->noBackBuffer) { - info->textureOffset = ((info->FbMapSize - info->textureSize) / - (width_bytes * 16)) * (width_bytes * 16); - } - else { - /* Reserve space for textures */ - info->textureOffset = ((info->FbMapSize - info->textureSize + - RADEON_BUFFER_ALIGN) & - ~(CARD32)RADEON_BUFFER_ALIGN); - } - - /* Reserve space for the shared depth - * buffer. - */ - info->depthOffset = ((info->textureOffset - depthSize + - RADEON_BUFFER_ALIGN) & - ~(CARD32)RADEON_BUFFER_ALIGN); - - /* Reserve space for the shared back buffer */ - if (info->noBackBuffer) { - info->backOffset = info->depthOffset; - } else { - info->backOffset = ((info->depthOffset - bufferSize + - RADEON_BUFFER_ALIGN) & - ~(CARD32)RADEON_BUFFER_ALIGN); - } - - info->backY = info->backOffset / width_bytes; - info->backX = (info->backOffset - (info->backY * width_bytes)) / cpp; - - scanlines = info->FbMapSize / width_bytes; - if (scanlines > 8191) scanlines = 8191; - - MemBox.x1 = 0; - MemBox.y1 = 0; - MemBox.x2 = pScrn->displayWidth; - MemBox.y2 = scanlines; - - if (!xf86InitFBManager(pScreen, &MemBox)) { - xf86DrvMsg(scrnIndex, X_ERROR, - "Memory manager initialization to " - "(%d,%d) (%d,%d) failed\n", - MemBox.x1, MemBox.y1, MemBox.x2, MemBox.y2); - return FALSE; - } else { - int width, height; - - xf86DrvMsg(scrnIndex, X_INFO, - "Memory manager initialized to (%d,%d) (%d,%d)\n", - MemBox.x1, MemBox.y1, MemBox.x2, MemBox.y2); - /* why oh why can't we just request modes which are guaranteed to be 16 lines - aligned... sigh */ - if ((fbarea = xf86AllocateOffscreenArea(pScreen, - pScrn->displayWidth, - info->allowColorTiling ? - ((pScrn->virtualY + 15) & ~15) - - pScrn->virtualY + 2 : 2, - 0, NULL, NULL, - NULL))) { - xf86DrvMsg(scrnIndex, X_INFO, - "Reserved area from (%d,%d) to (%d,%d)\n", - fbarea->box.x1, fbarea->box.y1, - fbarea->box.x2, fbarea->box.y2); - } else { - xf86DrvMsg(scrnIndex, X_ERROR, "Unable to reserve area\n"); - } - - RADEONDRIAllocatePCIGARTTable(pScreen); - - if (xf86QueryLargestOffscreenArea(pScreen, &width, - &height, 0, 0, 0)) { - xf86DrvMsg(scrnIndex, X_INFO, - "Largest offscreen area available: %d x %d\n", - width, height); - - /* Lines in offscreen area needed for depth buffer and - * textures - */ - info->depthTexLines = (scanlines - - info->depthOffset / width_bytes); - info->backLines = (scanlines - - info->backOffset / width_bytes - - info->depthTexLines); - info->backArea = NULL; - } else { - xf86DrvMsg(scrnIndex, X_ERROR, - "Unable to determine largest offscreen area " - "available\n"); - return FALSE; - } - } - - xf86DrvMsg(scrnIndex, X_INFO, - "Will use back buffer at offset 0x%x\n", - info->backOffset); - xf86DrvMsg(scrnIndex, X_INFO, - "Will use depth buffer at offset 0x%x\n", - info->depthOffset); - xf86DrvMsg(scrnIndex, X_INFO, - "Will use %d kb for PCI GART table at offset 0x%x\n", - info->pciGartSize/1024, info->pciGartOffset); - xf86DrvMsg(scrnIndex, X_INFO, - "Will use %d kb for textures at offset 0x%x\n", - info->textureSize/1024, info->textureOffset); - - info->frontPitchOffset = (((info->frontPitch * cpp / 64) << 22) | - ((info->frontOffset + info->fbLocation) >> 10)); - - info->backPitchOffset = (((info->backPitch * cpp / 64) << 22) | - ((info->backOffset + info->fbLocation) >> 10)); +#ifdef USE_EXA + if (info->useEXA && !RADEONSetupMemEXA(pScreen)) + return FALSE; +#endif - info->depthPitchOffset = (((info->depthPitch * cpp / 64) << 22) | - ((info->depthOffset + info->fbLocation) >> 10)); - } else +#if defined(XF86DRI) && defined(USE_XAA) + if (!info->useEXA && hasDRI && !RADEONSetupMemXAA_DRI(scrnIndex, pScreen)) + return FALSE; #endif - { - int width_bytes = pScrn->displayWidth * info->CurrentLayout.pixel_bytes; - MemBox.x1 = 0; - MemBox.y1 = 0; - MemBox.x2 = pScrn->displayWidth; - y2 = info->FbMapSize / width_bytes; - if (y2 >= 32768) y2 = 32767; /* because MemBox.y2 is signed short */ - MemBox.y2 = y2; - - /* The acceleration engine uses 14 bit - signed coordinates, so we can't have any - drawable caches beyond this region. */ - if (MemBox.y2 > 8191) MemBox.y2 = 8191; - - if (!xf86InitFBManager(pScreen, &MemBox)) { - xf86DrvMsg(scrnIndex, X_ERROR, - "Memory manager initialization to " - "(%d,%d) (%d,%d) failed\n", - MemBox.x1, MemBox.y1, MemBox.x2, MemBox.y2); - return FALSE; - } else { - int width, height; - FBAreaPtr fbarea; - xf86DrvMsg(scrnIndex, X_INFO, - "Memory manager initialized to (%d,%d) (%d,%d)\n", - MemBox.x1, MemBox.y1, MemBox.x2, MemBox.y2); - if ((fbarea = xf86AllocateOffscreenArea(pScreen, - pScrn->displayWidth, - info->allowColorTiling ? - ((pScrn->virtualY + 15) & ~15) - - pScrn->virtualY + 2 : 2, - 0, NULL, NULL, - NULL))) { - xf86DrvMsg(scrnIndex, X_INFO, - "Reserved area from (%d,%d) to (%d,%d)\n", - fbarea->box.x1, fbarea->box.y1, - fbarea->box.x2, fbarea->box.y2); - } else { - xf86DrvMsg(scrnIndex, X_ERROR, "Unable to reserve area\n"); - } - if (xf86QueryLargestOffscreenArea(pScreen, &width, &height, - 0, 0, 0)) { - xf86DrvMsg(scrnIndex, X_INFO, - "Largest offscreen area available: %d x %d\n", - width, height); - } - } - } +#ifdef USE_XAA + if (!info->useEXA && !hasDRI && !RADEONSetupMemXAA(scrnIndex, pScreen)) + return FALSE; +#endif - info->dst_pitch_offset = (((pScrn->displayWidth * info->CurrentLayout.pixel_bytes / 64) << 22) | - ((info->fbLocation + pScrn->fbOffset) >> 10)); + info->dst_pitch_offset = (((pScrn->displayWidth * info->CurrentLayout.pixel_bytes / 64) + << 22) | ((info->fbLocation + pScrn->fbOffset) >> 10)); - /* Acceleration setup */ + /* Acceleration setup */ if (!xf86ReturnOptValBool(info->Options, OPTION_NOACCEL, FALSE)) { if (RADEONAccelInit(pScreen)) { xf86DrvMsg(scrnIndex, X_INFO, "Acceleration enabled\n"); @@ -5538,41 +5659,53 @@ _X_EXPORT Bool RADEONScreenInit(int scrnIndex, ScreenPtr pScreen, info->accelOn = FALSE; } - /* DGA setup */ - RADEONDGAInit(pScreen); + /* DGA setup XXX FIXME EXA */ + if (!info->useEXA) + RADEONDGAInit(pScreen); - /* Backing store setup */ + /* Backing store setup */ miInitializeBackingStore(pScreen); xf86SetBackingStore(pScreen); - /* Set Silken Mouse */ + /* Set Silken Mouse */ xf86SetSilkenMouse(pScreen); - /* Cursor setup */ + /* Cursor setup */ miDCInitialize(pScreen, xf86GetPointerScreenFuncs()); - /* Hardware cursor setup */ + /* Hardware cursor setup */ if (!xf86ReturnOptValBool(info->Options, OPTION_SW_CURSOR, FALSE)) { if (RADEONCursorInit(pScreen)) { - int width, height; +#ifdef USE_EXA + if (info->useEXA) { + xf86DrvMsg(pScrn->scrnIndex, X_INFO, + "Using hardware cursor (offset %d)\n", + info->cursor_offset); + } +#endif /* USE_EXA */ +#ifdef USE_XAA + if (!info->useEXA) { + int width, height; - xf86DrvMsg(pScrn->scrnIndex, X_INFO, - "Using hardware cursor (scanline %ld)\n", - info->cursor_start / pScrn->displayWidth - / info->CurrentLayout.pixel_bytes); - if (xf86QueryLargestOffscreenArea(pScreen, &width, &height, + xf86DrvMsg(pScrn->scrnIndex, X_INFO, + "Using hardware cursor (scanline %ld)\n", + info->cursor_offset / pScrn->displayWidth + / info->CurrentLayout.pixel_bytes); + if (xf86QueryLargestOffscreenArea(pScreen, &width, &height, 0, 0, 0)) { - xf86DrvMsg(scrnIndex, X_INFO, - "Largest offscreen area available: %d x %d\n", - width, height); + xf86DrvMsg(scrnIndex, X_INFO, + "Largest offscreen area available: %d x %d\n", + width, height); + } } +#endif /* USE_XAA */ } else { xf86DrvMsg(scrnIndex, X_ERROR, "Hardware cursor initialization failed\n"); xf86DrvMsg(scrnIndex, X_INFO, "Using software cursor\n"); } } else { - info->cursor_start = 0; + info->cursor_offset = 0; xf86DrvMsg(scrnIndex, X_INFO, "Using software cursor\n"); } @@ -5634,7 +5767,15 @@ _X_EXPORT Bool RADEONScreenInit(int scrnIndex, ScreenPtr pScreen, done their thing, complete the DRI setup. */ if (!(info->directRenderingEnabled = RADEONDRIFinishScreenInit(pScreen))) { - RADEONAccelInitMMIO(pScreen, info->accel); +#ifdef USE_EXA + if (info->useEXA) { + RADEONDrawInitMMIO(pScreen); + } +#endif /* USE_EXA */ +#ifdef USE_XAA + if (!info->useEXA) + RADEONAccelInitMMIO(pScreen, info->accel); +#endif /* USE_XAA */ } } if (info->directRenderingEnabled) { @@ -7992,7 +8133,8 @@ _X_EXPORT Bool RADEONSwitchMode(int scrnIndex, DisplayModePtr mode, int flags) #endif } - if (info->accelOn) info->accel->Sync(pScrn); + if (info->accelOn) + RADEON_SYNC(info, pScrn); if (info->FBDev) { RADEONSaveFBDevRegisters(pScrn, &info->ModeReg); @@ -8015,7 +8157,7 @@ _X_EXPORT Bool RADEONSwitchMode(int scrnIndex, DisplayModePtr mode, int flags) } if (info->accelOn) { - info->accel->Sync(pScrn); + RADEON_SYNC(info, pScrn); RADEONEngineRestore(pScrn); } @@ -8185,7 +8327,8 @@ _X_EXPORT void RADEONAdjustFrame(int scrnIndex, int x, int y, int flags) if (info->CPStarted) DRILock(pScrn->pScreen, 0); #endif - if (info->accelOn) info->accel->Sync(pScrn); + if (info->accelOn) + RADEON_SYNC(info, pScrn); if(info->MergedFB) { RADEONAdjustFrameMerged(scrnIndex, x, y, flags); @@ -8312,10 +8455,12 @@ static Bool RADEONCloseScreen(int scrnIndex, ScreenPtr pScreen) } #endif - if(info->RenderTex) { +#ifdef USE_XAA + if(!info->useEXA && info->RenderTex) { xf86FreeOffscreenLinear(info->RenderTex); info->RenderTex = NULL; } +#endif /* USE_XAA */ if (pScrn->vtSema) { RADEONDisplayPowerManagementSet(pScrn, DPMSModeOn, 0); @@ -8323,11 +8468,21 @@ static Bool RADEONCloseScreen(int scrnIndex, ScreenPtr pScreen) } RADEONUnmapMem(pScrn); - if (info->accel) XAADestroyInfoRec(info->accel); - info->accel = NULL; +#ifdef USE_EXA + if (info->useEXA && info->accelOn) + exaDriverFini(pScreen); +#endif /* USE_EXA */ +#ifdef USE_XAA + if (!info->useEXA) { + if (info->accel) + XAADestroyInfoRec(info->accel); + info->accel = NULL; - if (info->scratch_save) xfree(info->scratch_save); - info->scratch_save = NULL; + if (info->scratch_save) + xfree(info->scratch_save); + info->scratch_save = NULL; + } +#endif /* USE_XAA */ if (info->cursor) xf86DestroyCursorInfoRec(info->cursor); info->cursor = NULL; @@ -8476,7 +8631,8 @@ static void RADEONDisplayPowerManagementSet(ScrnInfoPtr pScrn, if (info->CPStarted) DRILock(pScrn->pScreen, 0); #endif - if (info->accelOn) info->accel->Sync(pScrn); + if (info->accelOn) + RADEON_SYNC(info, pScrn); if (info->FBDev) { fbdevHWDPMSSet(pScrn, PowerManagementMode, flags); diff --git a/src/radeon_exa.c b/src/radeon_exa.c new file mode 100644 index 00000000..632faaf0 --- /dev/null +++ b/src/radeon_exa.c @@ -0,0 +1,463 @@ +/* + * Copyright 2005 Eric Anholt + * Copyright 2005 Benjamin Herrenschmidt + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Eric Anholt <anholt@FreeBSD.org> + * Zack Rusin <zrusin@trolltech.com> + * Benjamin Herrenschmidt <benh@kernel.crashing.org> + * + */ + +#include "radeon.h" +#include "radeon_reg.h" +#include "radeon_dri.h" +#include "radeon_macros.h" +#include "radeon_probe.h" +#include "radeon_version.h" +#include "radeon_sarea.h" + +#include "xf86.h" + + +/***********************************************************************/ +#define RINFO_FROM_SCREEN(pScr) ScrnInfoPtr pScrn = xf86Screens[pScr->myNum]; \ + RADEONInfoPtr info = RADEONPTR(pScrn) + +#define RADEON_TRACE_FALL 0 +#define RADEON_TRACE_DRAW 0 + +#if RADEON_TRACE_FALL +#define RADEON_FALLBACK(x) \ +do { \ + ErrorF("%s: ", __FUNCTION__); \ + ErrorF x; \ + return FALSE; \ +} while (0) +#else +#define RADEON_FALLBACK(x) return FALSE +#endif + +#if RADEON_TRACE_DRAW +#define TRACE do { ErrorF("TRACE: %s\n", __FUNCTION__); } while(0) +#else +#define TRACE +#endif + +static struct { + int rop; + int pattern; +} RADEON_ROP[] = { + { RADEON_ROP3_ZERO, RADEON_ROP3_ZERO }, /* GXclear */ + { RADEON_ROP3_DSa, RADEON_ROP3_DPa }, /* Gxand */ + { RADEON_ROP3_SDna, RADEON_ROP3_PDna }, /* GXandReverse */ + { RADEON_ROP3_S, RADEON_ROP3_P }, /* GXcopy */ + { RADEON_ROP3_DSna, RADEON_ROP3_DPna }, /* GXandInverted */ + { RADEON_ROP3_D, RADEON_ROP3_D }, /* GXnoop */ + { RADEON_ROP3_DSx, RADEON_ROP3_DPx }, /* GXxor */ + { RADEON_ROP3_DSo, RADEON_ROP3_DPo }, /* GXor */ + { RADEON_ROP3_DSon, RADEON_ROP3_DPon }, /* GXnor */ + { RADEON_ROP3_DSxn, RADEON_ROP3_PDxn }, /* GXequiv */ + { RADEON_ROP3_Dn, RADEON_ROP3_Dn }, /* GXinvert */ + { RADEON_ROP3_SDno, RADEON_ROP3_PDno }, /* GXorReverse */ + { RADEON_ROP3_Sn, RADEON_ROP3_Pn }, /* GXcopyInverted */ + { RADEON_ROP3_DSno, RADEON_ROP3_DPno }, /* GXorInverted */ + { RADEON_ROP3_DSan, RADEON_ROP3_DPan }, /* GXnand */ + { RADEON_ROP3_ONE, RADEON_ROP3_ONE } /* GXset */ +}; + +/* Compute log base 2 of val. */ +static __inline__ int +RADEONLog2(int val) +{ + int bits; + + for (bits = 0; val != 0; val >>= 1, ++bits) + ; + return bits - 1; +} + +static __inline__ CARD32 F_TO_DW(float val) +{ + union { + float f; + CARD32 l; + } tmp; + tmp.f = val; + return tmp.l; +} + +/* Assumes that depth 15 and 16 can be used as depth 16, which is okay since we + * require src and dest datatypes to be equal. + */ +static Bool RADEONGetDatatypeBpp(int bpp, CARD32 *type) +{ + switch (bpp) { + case 8: + *type = ATI_DATATYPE_CI8; + return TRUE; + case 16: + *type = ATI_DATATYPE_RGB565; + return TRUE; + case 24: + *type = ATI_DATATYPE_CI8; + return TRUE; + case 32: + *type = ATI_DATATYPE_ARGB8888; + return TRUE; + default: + RADEON_FALLBACK(("Unsupported bpp: %d\n", bpp)); + return FALSE; + } +} + +static Bool RADEONPixmapIsColortiled(PixmapPtr pPix) +{ + RINFO_FROM_SCREEN(pPix->drawable.pScreen); + + /* This doesn't account for the back buffer, which we may want to wrap in + * a pixmap at some point for the purposes of DRI buffer moves. + */ + if (info->tilingEnabled && exaGetPixmapOffset(pPix) == 0) + return TRUE; + else + return FALSE; +} + +static Bool RADEONGetOffsetPitch(PixmapPtr pPix, int bpp, CARD32 *pitch_offset, + unsigned int offset, unsigned int pitch) +{ + RINFO_FROM_SCREEN(pPix->drawable.pScreen); + + if (pitch % info->exa.card.pixmapPitchAlign != 0) + RADEON_FALLBACK(("Bad pitch 0x%08x\n", pitch)); + + if (offset % info->exa.card.pixmapOffsetAlign != 0) + RADEON_FALLBACK(("Bad offset 0x%08x\n", offset)); + + pitch = pitch >> 6; + *pitch_offset = (pitch << 22) | (offset >> 10); + + /* If it's the front buffer, we've got to note that it's tiled? */ + if (RADEONPixmapIsColortiled(pPix)) + *pitch_offset |= RADEON_DST_TILE_MACRO; + return TRUE; +} + +static Bool RADEONGetPixmapOffsetPitch(PixmapPtr pPix, CARD32 *pitch_offset) +{ + RINFO_FROM_SCREEN(pPix->drawable.pScreen); + CARD32 pitch, offset; + int bpp; + + bpp = pPix->drawable.bitsPerPixel; + if (bpp == 24) + bpp = 8; + + offset = exaGetPixmapOffset(pPix) + info->fbLocation; + pitch = exaGetPixmapPitch(pPix); + + return RADEONGetOffsetPitch(pPix, bpp, pitch_offset, offset, pitch); +} + +#if X_BYTE_ORDER == X_BIG_ENDIAN + +static unsigned long swapper_surfaces[3]; + +static Bool RADEONPrepareAccess(PixmapPtr pPix, int index) +{ + RINFO_FROM_SCREEN(pPix->drawable.pScreen); + unsigned char *RADEONMMIO = info->MMIO; + CARD32 offset = exaGetPixmapOffset(pPix); + int bpp, rc, soff; + CARD32 size, flags; + + /* Front buffer is always set with proper swappers */ + if (offset == 0) + return TRUE; + + /* If same bpp as front buffer, just do nothing as the main + * swappers will apply + */ + bpp = pPix->drawable.bitsPerPixel; + if (bpp == pScrn->bitsPerPixel) + return TRUE; + + /* We need to setup a separate swapper, let's request a + * surface. We need to align the size first + */ + size = exaGetPixmapSize(pPix); + size = (size + RADEON_BUFFER_ALIGN) & ~(RADEON_BUFFER_ALIGN); + + /* Set surface to tiling disabled with appropriate swapper */ + switch (bpp) { + case 16: + flags = RADEON_SURF_AP0_SWP_16BPP | RADEON_SURF_AP1_SWP_16BPP; + break; + case 32: + flags = RADEON_SURF_AP0_SWP_32BPP | RADEON_SURF_AP1_SWP_32BPP; + break; + default: + flags = 0; + } +#if defined(XF86DRI) + if (info->directRenderingEnabled && info->allowColorTiling) { + drmRadeonSurfaceAlloc drmsurfalloc; + + drmsurfalloc.address = offset; + drmsurfalloc.size = size; + drmsurfalloc.flags = flags | 1; /* bogus pitch to please DRM */ + + rc = drmCommandWrite(info->drmFD, DRM_RADEON_SURF_ALLOC, + &drmsurfalloc, sizeof(drmsurfalloc)); + if (rc < 0) { + xf86DrvMsg(pScrn->scrnIndex, X_ERROR, + "drm: could not allocate surface for access" + " swapper, err: %d!\n", rc); + return FALSE; + } + swapper_surfaces[index] = offset; + + return TRUE; + } +#endif + soff = (index + 1) * 0x10; + OUTREG(RADEON_SURFACE0_INFO + soff, flags); + OUTREG(RADEON_SURFACE0_LOWER_BOUND + soff, offset); + OUTREG(RADEON_SURFACE0_UPPER_BOUND + soff, offset + size - 1); + swapper_surfaces[index] = offset; + return TRUE; +} + +static void RADEONFinishAccess(PixmapPtr pPix, int index) +{ + RINFO_FROM_SCREEN(pPix->drawable.pScreen); + unsigned char *RADEONMMIO = info->MMIO; + CARD32 offset = exaGetPixmapOffset(pPix); + int bpp, soff; + + /* Front buffer is always set with proper swappers */ + if (offset == 0) + return; + + if (swapper_surfaces[index] == 0) + return; +#if defined(XF86DRI) + if (info->directRenderingEnabled && info->allowColorTiling) { + drmRadeonSurfaceFree drmsurffree; + + drmsurffree.address = offset; + drmCommandWrite(info->drmFD, DRM_RADEON_SURF_FREE, + &drmsurffree, sizeof(drmsurffree)); + swapper_surfaces[index] = 0; + return; + } +#endif + soff = (index + 1) * 0x10; + OUTREG(RADEON_SURFACE0_INFO + soff, 0); + OUTREG(RADEON_SURFACE0_LOWER_BOUND + soff, 0); + OUTREG(RADEON_SURFACE0_UPPER_BOUND + soff, 0); + swapper_surfaces[index] = 0; +} + +#endif /* X_BYTE_ORDER == X_BIG_ENDIAN */ + +#define RADEON_SWITCH_TO_2D() \ +do { \ + if (info->engineMode == EXA_ENGINEMODE_2D) \ + break; \ + BEGIN_ACCEL(2); \ + OUT_ACCEL_REG(RADEON_RB2D_DSTCACHE_CTLSTAT, RADEON_RB2D_DC_FLUSH); \ + OUT_ACCEL_REG(RADEON_WAIT_UNTIL, \ + RADEON_WAIT_HOST_IDLECLEAN | \ + RADEON_WAIT_3D_IDLECLEAN); \ + FINISH_ACCEL(); \ + info->engineMode = EXA_ENGINEMODE_2D; \ +} while (0); + +#define RADEON_SWITCH_TO_3D() \ +do { \ + if (info->engineMode == EXA_ENGINEMODE_3D) \ + break; \ + BEGIN_ACCEL(2); \ + OUT_ACCEL_REG(RADEON_RB2D_DSTCACHE_CTLSTAT, RADEON_RB2D_DC_FLUSH); \ + OUT_ACCEL_REG(RADEON_WAIT_UNTIL, \ + RADEON_WAIT_HOST_IDLECLEAN | \ + RADEON_WAIT_2D_IDLECLEAN | \ + RADEON_WAIT_3D_IDLECLEAN); \ + FINISH_ACCEL(); \ + info->engineMode = EXA_ENGINEMODE_3D; \ +} while (0); + +#define ENTER_DRAW(x) TRACE +#define LEAVE_DRAW(x) TRACE +/***********************************************************************/ + +#define ACCEL_MMIO +#define ACCEL_PREAMBLE() unsigned char *RADEONMMIO = info->MMIO +#define BEGIN_ACCEL(n) RADEONWaitForFifo(pScrn, (n)) +#define OUT_ACCEL_REG(reg, val) OUTREG(reg, val) +#define FINISH_ACCEL() + +#ifdef RENDER +#include "radeon_exa_render.c" +#endif +#include "radeon_exa_funcs.c" + +#undef ACCEL_MMIO +#undef ACCEL_PREAMBLE +#undef BEGIN_ACCEL +#undef OUT_ACCEL_REG +#undef FINISH_ACCEL + +#ifdef XF86DRI + +#define ACCEL_CP +#define ACCEL_PREAMBLE() \ + RING_LOCALS; \ + RADEONCP_REFRESH(pScrn, info) +#define BEGIN_ACCEL(n) BEGIN_RING(2*(n)) +#define OUT_ACCEL_REG(reg, val) OUT_RING_REG(reg, val) +#define FINISH_ACCEL() ADVANCE_RING() + +#define OUT_RING_F(x) OUT_RING(F_TO_DW(x)) + +#ifdef RENDER +#include "radeon_exa_render.c" +#endif +#include "radeon_exa_funcs.c" + +#endif /* XF86DRI */ + +/* + * Once screen->off_screen_base is set, this function + * allocates the remaining memory appropriately + */ +Bool RADEONSetupMemEXA (ScreenPtr pScreen) +{ + ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; + RADEONInfoPtr info = RADEONPTR(pScrn); + int cpp = info->CurrentLayout.pixel_bytes; + int l; + int next, screen_size; + int byteStride = pScrn->displayWidth * cpp; + + /* Need to adjust screen size for 16 line tiles, and then make it align to. + * the buffer alignment requirement. + */ + if (info->allowColorTiling) + screen_size = RADEON_ALIGN(pScrn->virtualY, 16) * byteStride; + else + screen_size = pScrn->virtualY * byteStride; + + info->exa.card.memoryBase = info->FB + pScrn->fbOffset; + info->exa.card.memorySize = info->FbMapSize; + info->exa.card.offScreenBase = screen_size; + + xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Allocating from a screen of %ld kb\n", + info->exa.card.memorySize / 1024); + + xf86DrvMsg(pScrn->scrnIndex, X_INFO, + "Will use %d kb for front buffer at offset 0x%08x\n", + screen_size / 1024, 0); + +#if defined(XF86DRI) + if (info->directRenderingEnabled) { + int depth_size; + + info->frontOffset = 0; + info->frontPitch = pScrn->displayWidth; + + if (info->IsPCI && info->drmMinor >= 19) { + info->pciGartSize = RADEON_PCIGART_TABLE_SIZE; + info->pciGartOffset = RADEON_ALIGN(info->exa.card.offScreenBase, + 256); + info->exa.card.offScreenBase = info->pciGartOffset + + info->pciGartSize; + xf86DrvMsg(pScrn->scrnIndex, X_INFO, + "Will use %d kb for PCI GART at offset 0x%08x\n", + RADEON_PCIGART_TABLE_SIZE / 1024, + (int)info->pciGartOffset); + } + /* Reserve a static area for the back buffer the same size as the + * visible screen. XXX: This would be better initialized in ati_dri.c + * when GLX is set up, but the offscreen memory manager's allocations + * don't last through VT switches, while the kernel's understanding of + * offscreen locations does. + */ + info->backPitch = pScrn->displayWidth; + next = RADEON_ALIGN(info->exa.card.offScreenBase, RADEON_BUFFER_ALIGN); + if (!info->noBackBuffer && + next + screen_size <= info->exa.card.memorySize) + { + info->backOffset = next; + info->exa.card.offScreenBase = next + screen_size; + xf86DrvMsg(pScrn->scrnIndex, X_INFO, + "Will use %d kb for back buffer at offset 0x%08x\n", + screen_size / 1024, info->backOffset); + } + + /* Reserve the static depth buffer, and adjust pitch and height to + * handle tiling. + */ + info->depthPitch = RADEON_ALIGN(pScrn->displayWidth, 32); + depth_size = RADEON_ALIGN(pScrn->virtualY, 16) * info->depthPitch * cpp; + next = RADEON_ALIGN(info->exa.card.offScreenBase, RADEON_BUFFER_ALIGN); + if (next + depth_size <= info->exa.card.memorySize) + { + info->depthOffset = next; + info->exa.card.offScreenBase = next + depth_size; + xf86DrvMsg(pScrn->scrnIndex, X_INFO, + "Will use %d kb for depth buffer at offset 0x%08x\n", + depth_size / 1024, info->depthOffset); + } + + /* Reserve approx. half of remaining offscreen memory for local + * textures. Round down to a whole number of texture regions. + */ + info->textureSize = (info->exa.card.memorySize - + info->exa.card.offScreenBase) / 2; + l = RADEONLog2(info->textureSize / RADEON_NR_TEX_REGIONS); + if (l < RADEON_LOG_TEX_GRANULARITY) + l = RADEON_LOG_TEX_GRANULARITY; + info->textureSize = (info->textureSize >> l) << l; + if (info->textureSize >= 512 * 1024) { + info->textureOffset = info->exa.card.offScreenBase; + info->exa.card.offScreenBase += info->textureSize; + xf86DrvMsg(pScrn->scrnIndex, X_INFO, + "Will use %d kb for textures at offset 0x%08x\n", + info->textureSize / 1024, info->textureOffset); + } else { + /* Minimum texture size is for 2 256x256x32bpp textures */ + info->textureSize = 0; + } + } +#endif /* XF86DRI */ + + xf86DrvMsg(pScrn->scrnIndex, X_INFO, + "Will use %ld kb for X Server offscreen at offset 0x%08lx\n", + (info->exa.card.memorySize - info->exa.card.offScreenBase) / + 1024, info->exa.card.offScreenBase); + + return TRUE; +} diff --git a/src/radeon_exa_funcs.c b/src/radeon_exa_funcs.c new file mode 100644 index 00000000..b6012839 --- /dev/null +++ b/src/radeon_exa_funcs.c @@ -0,0 +1,408 @@ +/* + * Copyright 2005 Eric Anholt + * Copyright 2005 Benjamin Herrenschmidt + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Eric Anholt <anholt@FreeBSD.org> + * Zack Rusin <zrusin@trolltech.com> + * Benjamin Herrenschmidt <benh@kernel.crashing.org> + * + */ + +#if defined(ACCEL_MMIO) && defined(ACCEL_CP) +#error Cannot define both MMIO and CP acceleration! +#endif + +#if !defined(UNIXCPP) || defined(ANSICPP) +#define FUNC_NAME_CAT(prefix,suffix) prefix##suffix +#else +#define FUNC_NAME_CAT(prefix,suffix) prefix/**/suffix +#endif + +#ifdef ACCEL_MMIO +#define FUNC_NAME(prefix) FUNC_NAME_CAT(prefix,MMIO) +#else +#ifdef ACCEL_CP +#define FUNC_NAME(prefix) FUNC_NAME_CAT(prefix,CP) +#else +#error No accel type defined! +#endif +#endif + +#include "radeon.h" +#include "atidri.h" + +#include "exa.h" + +#include "fbdevhw.h" + +static void +FUNC_NAME(RADEONSync)(ScreenPtr pScreen, int marker) +{ + TRACE; + + FUNC_NAME(RADEONWaitForIdle)(xf86Screens[pScreen->myNum]); +} + +static Bool +FUNC_NAME(RADEONPrepareSolid)(PixmapPtr pPix, int alu, Pixel pm, Pixel fg) +{ + RINFO_FROM_SCREEN(pPix->drawable.pScreen); + CARD32 datatype, dst_pitch_offset; + ACCEL_PREAMBLE(); + + TRACE; + + if (pPix->drawable.bitsPerPixel == 24) + RADEON_FALLBACK(("24bpp unsupported\n")); + if (!RADEONGetDatatypeBpp(pPix->drawable.bitsPerPixel, &datatype)) + RADEON_FALLBACK(("RADEONGetDatatypeBpp failed\n")); + if (!RADEONGetPixmapOffsetPitch(pPix, &dst_pitch_offset)) + RADEON_FALLBACK(("RADEONGetPixmapOffsetPitch failed\n")); + + RADEON_SWITCH_TO_2D(); + + BEGIN_ACCEL(5); + OUT_ACCEL_REG(RADEON_DP_GUI_MASTER_CNTL, + RADEON_GMC_DST_PITCH_OFFSET_CNTL | + RADEON_GMC_BRUSH_SOLID_COLOR | + (datatype << 8) | + RADEON_GMC_SRC_DATATYPE_COLOR | + RADEON_ROP[alu].pattern | + RADEON_GMC_CLR_CMP_CNTL_DIS); + OUT_ACCEL_REG(RADEON_DP_BRUSH_FRGD_CLR, fg); + OUT_ACCEL_REG(RADEON_DP_WRITE_MASK, pm); + OUT_ACCEL_REG(RADEON_DP_CNTL, + (RADEON_DST_X_LEFT_TO_RIGHT | RADEON_DST_Y_TOP_TO_BOTTOM)); + OUT_ACCEL_REG(RADEON_DST_PITCH_OFFSET, dst_pitch_offset); + FINISH_ACCEL(); + + return TRUE; +} + + +static void +FUNC_NAME(RADEONSolid)(PixmapPtr pPix, int x1, int y1, int x2, int y2) +{ + + RINFO_FROM_SCREEN(pPix->drawable.pScreen); + ACCEL_PREAMBLE(); + + TRACE; + + BEGIN_ACCEL(2); + OUT_ACCEL_REG(RADEON_DST_Y_X, (y1 << 16) | x1); + OUT_ACCEL_REG(RADEON_DST_HEIGHT_WIDTH, ((y2 - y1) << 16) | (x2 - x1)); + FINISH_ACCEL(); +} + +static void +FUNC_NAME(RADEONDoneSolid)(PixmapPtr pPix) +{ + TRACE; +} + +static Bool +FUNC_NAME(RADEONPrepareCopy)(PixmapPtr pSrc, PixmapPtr pDst, + int xdir, int ydir, + int rop, + Pixel planemask) +{ + RINFO_FROM_SCREEN(pDst->drawable.pScreen); + CARD32 datatype, src_pitch_offset, dst_pitch_offset; + ACCEL_PREAMBLE(); + + TRACE; + + info->xdir = xdir; + info->ydir = ydir; + + if (pDst->drawable.bitsPerPixel == 24) + RADEON_FALLBACK(("24bpp unsupported")); + if (!RADEONGetDatatypeBpp(pDst->drawable.bitsPerPixel, &datatype)) + RADEON_FALLBACK(("RADEONGetDatatypeBpp failed\n")); + if (!RADEONGetPixmapOffsetPitch(pSrc, &src_pitch_offset)) + RADEON_FALLBACK(("RADEONGetPixmapOffsetPitch source failed\n")); + if (!RADEONGetPixmapOffsetPitch(pDst, &dst_pitch_offset)) + RADEON_FALLBACK(("RADEONGetPixmapOffsetPitch dest failed\n")); + + RADEON_SWITCH_TO_2D(); + + BEGIN_ACCEL(5); + OUT_ACCEL_REG(RADEON_DP_GUI_MASTER_CNTL, + RADEON_GMC_DST_PITCH_OFFSET_CNTL | + RADEON_GMC_SRC_PITCH_OFFSET_CNTL | + RADEON_GMC_BRUSH_NONE | + (datatype << 8) | + RADEON_GMC_SRC_DATATYPE_COLOR | + RADEON_ROP[rop].rop | + RADEON_DP_SRC_SOURCE_MEMORY | + RADEON_GMC_CLR_CMP_CNTL_DIS); + OUT_ACCEL_REG(RADEON_DP_WRITE_MASK, planemask); + OUT_ACCEL_REG(RADEON_DP_CNTL, + ((xdir >= 0 ? RADEON_DST_X_LEFT_TO_RIGHT : 0) | + (ydir >= 0 ? RADEON_DST_Y_TOP_TO_BOTTOM : 0))); + OUT_ACCEL_REG(RADEON_DST_PITCH_OFFSET, dst_pitch_offset); + OUT_ACCEL_REG(RADEON_SRC_PITCH_OFFSET, src_pitch_offset); + FINISH_ACCEL(); + + return TRUE; +} + +static void +FUNC_NAME(RADEONCopy)(PixmapPtr pDst, + int srcX, int srcY, + int dstX, int dstY, + int w, int h) +{ + + RINFO_FROM_SCREEN(pDst->drawable.pScreen); + ACCEL_PREAMBLE(); + + TRACE; + + if (info->xdir < 0) { + srcX += w - 1; + dstX += w - 1; + } + if (info->ydir < 0) { + srcY += h - 1; + dstY += h - 1; + } + + BEGIN_ACCEL(3); + + OUT_ACCEL_REG(RADEON_SRC_Y_X, (srcY << 16) | srcX); + OUT_ACCEL_REG(RADEON_DST_Y_X, (dstY << 16) | dstX); + OUT_ACCEL_REG(RADEON_DST_HEIGHT_WIDTH, (h << 16) | w); + + FINISH_ACCEL(); +} + +static void +FUNC_NAME(RADEONDoneCopy)(PixmapPtr pDst) +{ + TRACE; +} + +static Bool +FUNC_NAME(RADEONUploadToScreen)(PixmapPtr pDst, char *src, int src_pitch) +{ +#if X_BYTE_ORDER == X_BIG_ENDIAN || defined(ACCEL_CP) + RINFO_FROM_SCREEN(pDst->drawable.pScreen); +#endif + CARD8 *dst = pDst->devPrivate.ptr; + unsigned int dst_pitch = exaGetPixmapPitch(pDst); + unsigned int w = pDst->drawable.width; + unsigned int h = pDst->drawable.height; + unsigned int bpp = pDst->drawable.bitsPerPixel; +#ifdef ACCEL_CP + unsigned int hpass; + CARD32 buf_pitch; +#endif +#if X_BYTE_ORDER == X_BIG_ENDIAN + unsigned char *RADEONMMIO = info->MMIO; + unsigned int swapper = info->ModeReg.surface_cntl & + ~(RADEON_NONSURF_AP0_SWP_32BPP | RADEON_NONSURF_AP1_SWP_32BPP | + RADEON_NONSURF_AP0_SWP_16BPP | RADEON_NONSURF_AP1_SWP_16BPP); +#endif + + TRACE; + +#ifdef ACCEL_CP + if (info->directRenderingEnabled) { + CARD8 *buf; + int cpp = bpp / 8; + ACCEL_PREAMBLE(); + + RADEON_SWITCH_TO_2D(); + while ((buf = RADEONHostDataBlit(pScrn, + cpp, w, dst_pitch, &buf_pitch, + &dst, &h, &hpass)) != 0) { + RADEONHostDataBlitCopyPass(pScrn, cpp, buf, (unsigned char *)src, + hpass, buf_pitch, src_pitch); + src += hpass * src_pitch; + } + + exaMarkSync(pDst->drawable.pScreen); + return TRUE; + } +#endif + + /* Do we need that sync here ? probably not .... */ + exaWaitSync(pDst->drawable.pScreen); + +#if X_BYTE_ORDER == X_BIG_ENDIAN + switch(bpp) { + case 15: + case 16: + swapper |= RADEON_NONSURF_AP0_SWP_16BPP + | RADEON_NONSURF_AP1_SWP_16BPP; + break; + case 24: + case 32: + swapper |= RADEON_NONSURF_AP0_SWP_32BPP + | RADEON_NONSURF_AP1_SWP_32BPP; + break; + } + OUTREG(RADEON_SURFACE_CNTL, swapper); +#endif + w *= bpp / 8; + + while (h--) { + memcpy(dst, src, w); + src += src_pitch; + dst += dst_pitch; + } + +#if X_BYTE_ORDER == X_BIG_ENDIAN + /* restore byte swapping */ + OUTREG(RADEON_SURFACE_CNTL, info->ModeReg.surface_cntl); +#endif + + return TRUE; +} + +static Bool +FUNC_NAME(RADEONDownloadFromScreen)(PixmapPtr pSrc, int x, int y, int w, int h, + char *dst, int dst_pitch) +{ +#if X_BYTE_ORDER == X_BIG_ENDIAN + RINFO_FROM_SCREEN(pSrc->drawable.pScreen); + unsigned char *RADEONMMIO = info->MMIO; + unsigned int swapper = info->ModeReg.surface_cntl & + ~(RADEON_NONSURF_AP0_SWP_32BPP | RADEON_NONSURF_AP1_SWP_32BPP | + RADEON_NONSURF_AP0_SWP_16BPP | RADEON_NONSURF_AP1_SWP_16BPP); +#endif + unsigned char *src = pSrc->devPrivate.ptr; + int src_pitch = exaGetPixmapPitch(pSrc); + int bpp = pSrc->drawable.bitsPerPixel; + + TRACE; + + /* + * This is currently done without DMA until I have ironed out the + * various endian issues with R300 among others + */ + exaWaitSync(pSrc->drawable.pScreen); + +#if X_BYTE_ORDER == X_BIG_ENDIAN + switch(bpp) { + case 15: + case 16: + swapper |= RADEON_NONSURF_AP0_SWP_16BPP + | RADEON_NONSURF_AP1_SWP_16BPP; + break; + case 24: + case 32: + swapper |= RADEON_NONSURF_AP0_SWP_32BPP + | RADEON_NONSURF_AP1_SWP_32BPP; + break; + } + OUTREG(RADEON_SURFACE_CNTL, swapper); +#endif + + src += (x * bpp / 8) + (y * src_pitch); + w *= bpp / 8; + + while (h--) { + memcpy(dst, src, w); + src += src_pitch; + dst += dst_pitch; + } + +#if X_BYTE_ORDER == X_BIG_ENDIAN + /* restore byte swapping */ + OUTREG(RADEON_SURFACE_CNTL, info->ModeReg.surface_cntl); +#endif + + return TRUE; +} + +Bool FUNC_NAME(RADEONDrawInit)(ScreenPtr pScreen) +{ + RINFO_FROM_SCREEN(pScreen); + + memset(&info->exa.accel, 0, sizeof(ExaAccelInfoRec)); + + info->exa.accel.PrepareSolid = FUNC_NAME(RADEONPrepareSolid); + info->exa.accel.Solid = FUNC_NAME(RADEONSolid); + info->exa.accel.DoneSolid = FUNC_NAME(RADEONDoneSolid); + + info->exa.accel.PrepareCopy = FUNC_NAME(RADEONPrepareCopy); + info->exa.accel.Copy = FUNC_NAME(RADEONCopy); + info->exa.accel.DoneCopy = FUNC_NAME(RADEONDoneCopy); + + info->exa.accel.WaitMarker = FUNC_NAME(RADEONSync); + info->exa.accel.UploadToScreen = FUNC_NAME(RADEONUploadToScreen); + info->exa.accel.DownloadFromScreen = FUNC_NAME(RADEONDownloadFromScreen); + +#if X_BYTE_ORDER == X_BIG_ENDIAN + info->exa.accel.PrepareAccess = RADEONPrepareAccess; + info->exa.accel.FinishAccess = RADEONFinishAccess; +#endif /* X_BYTE_ORDER == X_BIG_ENDIAN */ + + info->exa.card.flags = EXA_OFFSCREEN_PIXMAPS; + info->exa.card.pixmapOffsetAlign = RADEON_BUFFER_ALIGN + 1; + info->exa.card.pixmapPitchAlign = 64; + + info->exa.card.maxX = 2047; + info->exa.card.maxY = 2047; + + if (info->RenderAccel) { + if (info->ChipFamily >= CHIP_FAMILY_R300) { + xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Render acceleration " + "unsupported on R300 type cards and newer.\n"); + } else if ((info->ChipFamily == CHIP_FAMILY_RV250) || + (info->ChipFamily == CHIP_FAMILY_RV280) || + (info->ChipFamily == CHIP_FAMILY_RS300) || + (info->ChipFamily == CHIP_FAMILY_R200)) { + xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Render acceleration " + "enabled for R200 type cards.\n"); + info->exa.accel.CheckComposite = R200CheckComposite; + info->exa.accel.PrepareComposite = + FUNC_NAME(R200PrepareComposite); + info->exa.accel.Composite = FUNC_NAME(RadeonComposite); + info->exa.accel.DoneComposite = RadeonDoneComposite; + } else { + xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Render acceleration " + "enabled for R100 type cards.\n"); + info->exa.accel.CheckComposite = R100CheckComposite; + info->exa.accel.PrepareComposite = + FUNC_NAME(R100PrepareComposite); + info->exa.accel.Composite = FUNC_NAME(RadeonComposite); + info->exa.accel.DoneComposite = RadeonDoneComposite; + } + } + + RADEONEngineInit(pScrn); + + if (!exaDriverInit(pScreen, &info->exa)) { + return FALSE; + } + exaMarkSync(pScreen); + + return TRUE; +} + +#undef FUNC_NAME diff --git a/src/radeon_exa_render.c b/src/radeon_exa_render.c new file mode 100644 index 00000000..d4beb874 --- /dev/null +++ b/src/radeon_exa_render.c @@ -0,0 +1,799 @@ +/* + * Copyright 2005 Eric Anholt + * Copyright 2005 Benjamin Herrenschmidt + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Eric Anholt <anholt@FreeBSD.org> + * Zack Rusin <zrusin@trolltech.com> + * Benjamin Herrenschmidt <benh@kernel.crashing.org> + * + */ + +#if defined(ACCEL_MMIO) && defined(ACCEL_CP) +#error Cannot define both MMIO and CP acceleration! +#endif + +#if !defined(UNIXCPP) || defined(ANSICPP) +#define FUNC_NAME_CAT(prefix,suffix) prefix##suffix +#else +#define FUNC_NAME_CAT(prefix,suffix) prefix/**/suffix +#endif + +#ifdef ACCEL_MMIO +#define FUNC_NAME(prefix) FUNC_NAME_CAT(prefix,MMIO) +#else +#ifdef ACCEL_CP +#define FUNC_NAME(prefix) FUNC_NAME_CAT(prefix,CP) +#else +#error No accel type defined! +#endif +#endif + +#ifndef ACCEL_CP +#define ONLY_ONCE +#endif + +/* Only include the following (generic) bits once. */ +#ifdef ONLY_ONCE +static Bool is_transform[2]; +static PictTransform *transform[2]; + +struct blendinfo { + Bool dst_alpha; + Bool src_alpha; + CARD32 blend_cntl; +}; + +static struct blendinfo RadeonBlendOp[] = { + /* Clear */ + {0, 0, RADEON_SRC_BLEND_GL_ZERO | RADEON_DST_BLEND_GL_ZERO}, + /* Src */ + {0, 0, RADEON_SRC_BLEND_GL_ONE | RADEON_DST_BLEND_GL_ZERO}, + /* Dst */ + {0, 0, RADEON_SRC_BLEND_GL_ZERO | RADEON_DST_BLEND_GL_ONE}, + /* Over */ + {0, 1, RADEON_SRC_BLEND_GL_ONE | RADEON_DST_BLEND_GL_ONE_MINUS_SRC_ALPHA}, + /* OverReverse */ + {1, 0, RADEON_SRC_BLEND_GL_ONE_MINUS_DST_ALPHA | RADEON_DST_BLEND_GL_ONE}, + /* In */ + {1, 0, RADEON_SRC_BLEND_GL_DST_ALPHA | RADEON_DST_BLEND_GL_ZERO}, + /* InReverse */ + {0, 1, RADEON_SRC_BLEND_GL_ZERO | RADEON_DST_BLEND_GL_SRC_ALPHA}, + /* Out */ + {1, 0, RADEON_SRC_BLEND_GL_ONE_MINUS_DST_ALPHA | RADEON_DST_BLEND_GL_ZERO}, + /* OutReverse */ + {0, 1, RADEON_SRC_BLEND_GL_ZERO | RADEON_DST_BLEND_GL_ONE_MINUS_SRC_ALPHA}, + /* Atop */ + {1, 1, RADEON_SRC_BLEND_GL_DST_ALPHA | RADEON_DST_BLEND_GL_ONE_MINUS_SRC_ALPHA}, + /* AtopReverse */ + {1, 1, RADEON_SRC_BLEND_GL_ONE_MINUS_DST_ALPHA | RADEON_DST_BLEND_GL_SRC_ALPHA}, + /* Xor */ + {1, 1, RADEON_SRC_BLEND_GL_ONE_MINUS_DST_ALPHA | RADEON_DST_BLEND_GL_ONE_MINUS_SRC_ALPHA}, + /* Add */ + {0, 0, RADEON_SRC_BLEND_GL_ONE | RADEON_DST_BLEND_GL_ONE}, +}; + +struct formatinfo { + int fmt; + Bool byte_swap; + CARD32 card_fmt; +}; + +/* Note on texture formats: + * TXFORMAT_Y8 expands to (Y,Y,Y,1). TXFORMAT_I8 expands to (I,I,I,I) + */ +static struct formatinfo R100TexFormats[] = { + {PICT_a8r8g8b8, 0, RADEON_TXFORMAT_ARGB8888 | RADEON_TXFORMAT_ALPHA_IN_MAP}, + {PICT_x8r8g8b8, 0, RADEON_TXFORMAT_ARGB8888}, + {PICT_a8b8g8r8, 1, RADEON_TXFORMAT_RGBA8888 | RADEON_TXFORMAT_ALPHA_IN_MAP}, + {PICT_x8b8g8r8, 1, RADEON_TXFORMAT_RGBA8888}, + {PICT_r5g6b5, 0, RADEON_TXFORMAT_RGB565}, + {PICT_a1r5g5b5, 0, RADEON_TXFORMAT_ARGB1555 | RADEON_TXFORMAT_ALPHA_IN_MAP}, + {PICT_x1r5g5b5, 0, RADEON_TXFORMAT_ARGB1555}, + {PICT_a8, 0, RADEON_TXFORMAT_I8 | RADEON_TXFORMAT_ALPHA_IN_MAP}, +}; + +static struct formatinfo R200TexFormats[] = { + {PICT_a8r8g8b8, 0, R200_TXFORMAT_ARGB8888 | R200_TXFORMAT_ALPHA_IN_MAP}, + {PICT_x8r8g8b8, 0, R200_TXFORMAT_ARGB8888}, + {PICT_a8r8g8b8, 1, R200_TXFORMAT_RGBA8888 | R200_TXFORMAT_ALPHA_IN_MAP}, + {PICT_x8r8g8b8, 1, R200_TXFORMAT_RGBA8888}, + {PICT_r5g6b5, 0, R200_TXFORMAT_RGB565}, + {PICT_a1r5g5b5, 0, R200_TXFORMAT_ARGB1555 | R200_TXFORMAT_ALPHA_IN_MAP}, + {PICT_x1r5g5b5, 0, R200_TXFORMAT_ARGB1555}, + {PICT_a8, 0, R200_TXFORMAT_I8 | R200_TXFORMAT_ALPHA_IN_MAP}, +}; + +/* Common Radeon setup code */ + +static Bool RADEONGetDestFormat(PicturePtr pDstPicture, CARD32 *dst_format) +{ + switch (pDstPicture->format) { + case PICT_a8r8g8b8: + case PICT_x8r8g8b8: + *dst_format = RADEON_COLOR_FORMAT_ARGB8888; + break; + case PICT_r5g6b5: + *dst_format = RADEON_COLOR_FORMAT_RGB565; + break; + case PICT_a1r5g5b5: + case PICT_x1r5g5b5: + *dst_format = RADEON_COLOR_FORMAT_ARGB1555; + break; + case PICT_a8: + *dst_format = RADEON_COLOR_FORMAT_RGB8; + break; + default: + RADEON_FALLBACK(("Unsupported dest format 0x%x\n", + (int)pDstPicture->format)); + } + + return TRUE; +} +static CARD32 RADEONGetBlendCntl(int op, CARD32 dst_format) +{ + CARD32 blendcntl = RadeonBlendOp[op].blend_cntl; + /* If there's no dst alpha channel, adjust the blend op so that we'll treat + * it as always 1. + */ + if (PICT_FORMAT_A(dst_format) == 0 && RadeonBlendOp[op].dst_alpha) { + if ((blendcntl & RADEON_SRC_BLEND_MASK) == + RADEON_SRC_BLEND_GL_DST_ALPHA) { + blendcntl = (blendcntl & ~RADEON_SRC_BLEND_MASK) | + RADEON_SRC_BLEND_GL_ONE; + } else if ((blendcntl & RADEON_SRC_BLEND_MASK) == + RADEON_SRC_BLEND_GL_ONE_MINUS_DST_ALPHA) { + blendcntl = (blendcntl & ~RADEON_SRC_BLEND_MASK) | + RADEON_SRC_BLEND_GL_ZERO; + } + } + + return blendcntl; +} + +union intfloat { + float f; + CARD32 i; +}; + +/* R100-specific code */ + +static Bool R100CheckCompositeTexture(PicturePtr pPict, int unit) +{ + int w = pPict->pDrawable->width; + int h = pPict->pDrawable->height; + int i; + + if ((w > 0x7ff) || (h > 0x7ff)) + RADEON_FALLBACK(("Picture w/h too large (%dx%d)\n", w, h)); + + for (i = 0; i < sizeof(R100TexFormats) / sizeof(R100TexFormats[0]); i++) { + if (R100TexFormats[i].fmt == pPict->format) + break; + } + if (i == sizeof(R100TexFormats) / sizeof(R100TexFormats[0])) + RADEON_FALLBACK(("Unsupported picture format 0x%x\n", + (int)pPict->format)); + + if (pPict->repeat && ((w & (w - 1)) != 0 || (h & (h - 1)) != 0)) + RADEON_FALLBACK(("NPOT repeat unsupported (%dx%d)\n", w, h)); + + if (pPict->filter != PictFilterNearest && + pPict->filter != PictFilterBilinear) + { + RADEON_FALLBACK(("Unsupported filter 0x%x\n", pPict->filter)); + } + + return TRUE; +} + +#endif /* ONLY_ONCE */ + +static Bool FUNC_NAME(R100TextureSetup)(PicturePtr pPict, PixmapPtr pPix, + int unit) +{ + RINFO_FROM_SCREEN(pPix->drawable.pScreen); + CARD32 txfilter, txformat, txoffset, txpitch; + int w = pPict->pDrawable->width; + int h = pPict->pDrawable->height; + int i; + ACCEL_PREAMBLE(); + + txpitch = exaGetPixmapPitch(pPix); + txoffset = exaGetPixmapOffset(pPix) + info->fbLocation; + + for (i = 0; i < sizeof(R100TexFormats) / sizeof(R100TexFormats[0]); i++) + { + if (R100TexFormats[i].fmt == pPict->format) + break; + } + txformat = R100TexFormats[i].card_fmt; + if (R100TexFormats[i].byte_swap) + txoffset |= RADEON_TXO_ENDIAN_BYTE_SWAP; + if (RADEONPixmapIsColortiled(pPix)) + txoffset |= RADEON_TXO_MACRO_TILE; + + if (pPict->repeat) { + txformat |= RADEONLog2(w) << RADEON_TXFORMAT_WIDTH_SHIFT; + txformat |= RADEONLog2(h) << RADEON_TXFORMAT_HEIGHT_SHIFT; + } else + txformat |= RADEON_TXFORMAT_NON_POWER2; + txformat |= unit << 24; /* RADEON_TXFORMAT_ST_ROUTE_STQX */ + + if ((txoffset & 0x1f) != 0) + RADEON_FALLBACK(("Bad texture offset 0x%x\n", (int)txoffset)); + if ((txpitch & 0x1f) != 0) + RADEON_FALLBACK(("Bad texture pitch 0x%x\n", (int)txpitch)); + + switch (pPict->filter) { + case PictFilterNearest: + txfilter = (RADEON_MAG_FILTER_NEAREST | RADEON_MIN_FILTER_NEAREST); + break; + case PictFilterBilinear: + txfilter = (RADEON_MAG_FILTER_LINEAR | RADEON_MIN_FILTER_LINEAR); + break; + default: + RADEON_FALLBACK(("Bad filter 0x%x\n", pPict->filter)); + } + + BEGIN_ACCEL(5); + if (unit == 0) { + OUT_ACCEL_REG(RADEON_PP_TXFILTER_0, txfilter); + OUT_ACCEL_REG(RADEON_PP_TXFORMAT_0, txformat); + OUT_ACCEL_REG(RADEON_PP_TXOFFSET_0, txoffset); + OUT_ACCEL_REG(RADEON_PP_TEX_SIZE_0, + (pPix->drawable.width - 1) | + ((pPix->drawable.height - 1) << RADEON_TEX_VSIZE_SHIFT)); + OUT_ACCEL_REG(RADEON_PP_TEX_PITCH_0, txpitch - 32); + } else { + OUT_ACCEL_REG(RADEON_PP_TXFILTER_1, txfilter); + OUT_ACCEL_REG(RADEON_PP_TXFORMAT_1, txformat); + OUT_ACCEL_REG(RADEON_PP_TXOFFSET_1, txoffset); + OUT_ACCEL_REG(RADEON_PP_TEX_SIZE_1, + (pPix->drawable.width - 1) | + ((pPix->drawable.height - 1) << RADEON_TEX_VSIZE_SHIFT)); + OUT_ACCEL_REG(RADEON_PP_TEX_PITCH_1, txpitch - 32); + } + FINISH_ACCEL(); + + if (pPict->transform != 0) { + is_transform[unit] = TRUE; + transform[unit] = pPict->transform; + } else { + is_transform[unit] = FALSE; + } + + return TRUE; +} + +#ifdef ONLY_ONCE +static Bool R100CheckComposite(int op, PicturePtr pSrcPicture, + PicturePtr pMaskPicture, PicturePtr pDstPicture) +{ + CARD32 tmp1; + + /* Check for unsupported compositing operations. */ + if (op >= sizeof(RadeonBlendOp) / sizeof(RadeonBlendOp[0])) + RADEON_FALLBACK(("Unsupported Composite op 0x%x\n", op)); + if (pMaskPicture != NULL && pMaskPicture->componentAlpha && + RadeonBlendOp[op].src_alpha) + { + RADEON_FALLBACK(("Component alpha not supported with source " + "alpha blending.\n")); + } + if (pDstPicture->pDrawable->width >= (1 << 11) || + pDstPicture->pDrawable->height >= (1 << 11)) + { + RADEON_FALLBACK(("Dest w/h too large (%d,%d).\n", + pDstPicture->pDrawable->width, + pDstPicture->pDrawable->height)); + } + + if (!R100CheckCompositeTexture(pSrcPicture, 0)) + return FALSE; + if (pMaskPicture != NULL && !R100CheckCompositeTexture(pMaskPicture, 1)) + return FALSE; + + if (pDstPicture->componentAlpha) + return FALSE; + + if (!RADEONGetDestFormat(pDstPicture, &tmp1)) + return FALSE; + + return TRUE; +} +#endif /* ONLY_ONCE */ + +static Bool FUNC_NAME(R100PrepareComposite)(int op, + PicturePtr pSrcPicture, + PicturePtr pMaskPicture, + PicturePtr pDstPicture, + PixmapPtr pSrc, + PixmapPtr pMask, + PixmapPtr pDst) +{ + RINFO_FROM_SCREEN(pDst->drawable.pScreen); + CARD32 dst_format, dst_offset, dst_pitch, colorpitch; + CARD32 pp_cntl, blendcntl, cblend, ablend; + int pixel_shift; + ACCEL_PREAMBLE(); + + TRACE; + + if (!info->XInited3D) + RADEONInit3DEngine(pScrn); + + RADEONGetDestFormat(pDstPicture, &dst_format); + pixel_shift = pDst->drawable.bitsPerPixel >> 4; + + dst_offset = exaGetPixmapOffset(pDst) + info->fbLocation; + dst_pitch = exaGetPixmapPitch(pDst); + colorpitch = dst_pitch >> pixel_shift; + if (RADEONPixmapIsColortiled(pDst)) + colorpitch |= RADEON_COLOR_TILE_ENABLE; + + dst_offset = exaGetPixmapOffset(pDst) + info->fbLocation; + dst_pitch = exaGetPixmapPitch(pDst); + if ((dst_offset & 0x0f) != 0) + RADEON_FALLBACK(("Bad destination offset 0x%x\n", (int)dst_offset)); + if (((dst_pitch >> pixel_shift) & 0x7) != 0) + RADEON_FALLBACK(("Bad destination pitch 0x%x\n", (int)dst_pitch)); + + if (!FUNC_NAME(R100TextureSetup)(pSrcPicture, pSrc, 0)) + return FALSE; + pp_cntl = RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE; + + if (pMask != NULL) { + if (!FUNC_NAME(R100TextureSetup)(pMaskPicture, pMask, 1)) + return FALSE; + pp_cntl |= RADEON_TEX_1_ENABLE; + } else { + is_transform[1] = FALSE; + } + + RADEON_SWITCH_TO_3D(); + + BEGIN_ACCEL(8); + OUT_ACCEL_REG(RADEON_PP_CNTL, pp_cntl); + OUT_ACCEL_REG(RADEON_RB3D_CNTL, dst_format | RADEON_ALPHA_BLEND_ENABLE); + OUT_ACCEL_REG(RADEON_RB3D_COLOROFFSET, dst_offset); + OUT_ACCEL_REG(RADEON_RB3D_COLORPITCH, colorpitch); + + /* IN operator: Multiply src by mask components or mask alpha. + * BLEND_CTL_ADD is A * B + C. + * If a picture is a8, we have to explicitly zero its color values. + * If the destination is a8, we have to route the alpha to red, I think. + */ + cblend = RADEON_BLEND_CTL_ADD | RADEON_CLAMP_TX | RADEON_COLOR_ARG_C_ZERO; + ablend = RADEON_BLEND_CTL_ADD | RADEON_CLAMP_TX | RADEON_ALPHA_ARG_C_ZERO; + + if (pDstPicture->format == PICT_a8) + cblend |= RADEON_COLOR_ARG_A_T0_ALPHA; + else if (pSrcPicture->format == PICT_a8) + cblend |= RADEON_COLOR_ARG_A_ZERO; + else + cblend |= RADEON_COLOR_ARG_A_T0_COLOR; + ablend |= RADEON_ALPHA_ARG_A_T0_ALPHA; + + if (pMask) { + if (pMaskPicture->componentAlpha && + pDstPicture->format != PICT_a8) + cblend |= RADEON_COLOR_ARG_B_T1_COLOR; + else + cblend |= RADEON_COLOR_ARG_B_T1_ALPHA; + ablend |= RADEON_ALPHA_ARG_B_T1_ALPHA; + } else { + cblend |= RADEON_COLOR_ARG_B_ZERO | RADEON_COMP_ARG_B; + ablend |= RADEON_ALPHA_ARG_B_ZERO | RADEON_COMP_ARG_B; + } + + OUT_ACCEL_REG(RADEON_PP_TXCBLEND_0, cblend); + OUT_ACCEL_REG(RADEON_PP_TXABLEND_0, ablend); + OUT_ACCEL_REG(RADEON_SE_VTX_FMT, RADEON_SE_VTX_FMT_XY | + RADEON_SE_VTX_FMT_ST0 | + RADEON_SE_VTX_FMT_ST1); + /* Op operator. */ + blendcntl = RADEONGetBlendCntl(op, pDstPicture->format); + OUT_ACCEL_REG(RADEON_RB3D_BLENDCNTL, blendcntl); + FINISH_ACCEL(); + + return TRUE; +} + +#ifdef ONLY_ONCE + +static Bool R200CheckCompositeTexture(PicturePtr pPict, int unit) +{ + int w = pPict->pDrawable->width; + int h = pPict->pDrawable->height; + int i; + + if ((w > 0x7ff) || (h > 0x7ff)) + RADEON_FALLBACK(("Picture w/h too large (%dx%d)\n", w, h)); + + for (i = 0; i < sizeof(R200TexFormats) / sizeof(R200TexFormats[0]); i++) + { + if (R200TexFormats[i].fmt == pPict->format) + break; + } + if (i == sizeof(R200TexFormats) / sizeof(R200TexFormats[0])) + RADEON_FALLBACK(("Unsupported picture format 0x%x\n", + (int)pPict->format)); + + if (pPict->repeat && ((w & (w - 1)) != 0 || (h & (h - 1)) != 0)) + RADEON_FALLBACK(("NPOT repeat unsupported (%dx%d)\n", w, h)); + + if (pPict->filter != PictFilterNearest && + pPict->filter != PictFilterBilinear) + RADEON_FALLBACK(("Unsupported filter 0x%x\n", pPict->filter)); + + return TRUE; +} + +#endif /* ONLY_ONCE */ + +static Bool FUNC_NAME(R200TextureSetup)(PicturePtr pPict, PixmapPtr pPix, + int unit) +{ + RINFO_FROM_SCREEN(pPix->drawable.pScreen); + CARD32 txfilter, txformat, txoffset, txpitch; + int w = pPict->pDrawable->width; + int h = pPict->pDrawable->height; + int i; + ACCEL_PREAMBLE(); + + txpitch = exaGetPixmapPitch(pPix); + txoffset = exaGetPixmapOffset(pPix) + info->fbLocation; + + for (i = 0; i < sizeof(R200TexFormats) / sizeof(R200TexFormats[0]); i++) + { + if (R200TexFormats[i].fmt == pPict->format) + break; + } + txformat = R200TexFormats[i].card_fmt; + if (R200TexFormats[i].byte_swap) + txoffset |= R200_TXO_ENDIAN_BYTE_SWAP; + if (RADEONPixmapIsColortiled(pPix)) + txoffset |= R200_TXO_MACRO_TILE; + + if (pPict->repeat) { + txformat |= RADEONLog2(w) << R200_TXFORMAT_WIDTH_SHIFT; + txformat |= RADEONLog2(h) << R200_TXFORMAT_HEIGHT_SHIFT; + } else + txformat |= R200_TXFORMAT_NON_POWER2; + txformat |= unit << R200_TXFORMAT_ST_ROUTE_SHIFT; + + if ((txoffset & 0x1f) != 0) + RADEON_FALLBACK(("Bad texture offset 0x%x\n", (int)txoffset)); + if ((txpitch & 0x1f) != 0) + RADEON_FALLBACK(("Bad texture pitch 0x%x\n", (int)txpitch)); + + switch (pPict->filter) { + case PictFilterNearest: + txfilter = (R200_MAG_FILTER_NEAREST | + R200_MIN_FILTER_NEAREST); + break; + case PictFilterBilinear: + txfilter = (R200_MAG_FILTER_LINEAR | + R200_MIN_FILTER_LINEAR); + break; + default: + RADEON_FALLBACK(("Bad filter 0x%x\n", pPict->filter)); + } + + BEGIN_ACCEL(6); + if (unit == 0) { + OUT_ACCEL_REG(R200_PP_TXFILTER_0, txfilter); + OUT_ACCEL_REG(R200_PP_TXFORMAT_0, txformat); + OUT_ACCEL_REG(R200_PP_TXFORMAT_X_0, 0); + OUT_ACCEL_REG(R200_PP_TXSIZE_0, (pPix->drawable.width - 1) | + ((pPix->drawable.height - 1) << RADEON_TEX_VSIZE_SHIFT)); + OUT_ACCEL_REG(R200_PP_TXPITCH_0, txpitch - 32); + OUT_ACCEL_REG(R200_PP_TXOFFSET_0, txoffset); + } else { + OUT_ACCEL_REG(R200_PP_TXFILTER_1, txfilter); + OUT_ACCEL_REG(R200_PP_TXFORMAT_1, txformat); + OUT_ACCEL_REG(R200_PP_TXFORMAT_X_1, 0); + OUT_ACCEL_REG(R200_PP_TXSIZE_1, (pPix->drawable.width - 1) | + ((pPix->drawable.height - 1) << RADEON_TEX_VSIZE_SHIFT)); + OUT_ACCEL_REG(R200_PP_TXPITCH_1, txpitch - 32); + OUT_ACCEL_REG(R200_PP_TXOFFSET_1, txoffset); + } + FINISH_ACCEL(); + + if (pPict->transform != 0) { + is_transform[unit] = TRUE; + transform[unit] = pPict->transform; + } else { + is_transform[unit] = FALSE; + } + + return TRUE; +} + +#ifdef ONLY_ONCE +static Bool R200CheckComposite(int op, PicturePtr pSrcPicture, PicturePtr pMaskPicture, + PicturePtr pDstPicture) +{ + CARD32 tmp1; + + TRACE; + + /* Check for unsupported compositing operations. */ + if (op >= sizeof(RadeonBlendOp) / sizeof(RadeonBlendOp[0])) + RADEON_FALLBACK(("Unsupported Composite op 0x%x\n", op)); + if (pMaskPicture != NULL && pMaskPicture->componentAlpha && + RadeonBlendOp[op].src_alpha) + RADEON_FALLBACK(("Component alpha not supported with source " + "alpha blending.\n")); + + if (!R200CheckCompositeTexture(pSrcPicture, 0)) + return FALSE; + if (pMaskPicture != NULL && !R200CheckCompositeTexture(pMaskPicture, 1)) + return FALSE; + + if (!RADEONGetDestFormat(pDstPicture, &tmp1)) + return FALSE; + + return TRUE; +} +#endif /* ONLY_ONCE */ + +static Bool FUNC_NAME(R200PrepareComposite)(int op, PicturePtr pSrcPicture, + PicturePtr pMaskPicture, PicturePtr pDstPicture, + PixmapPtr pSrc, PixmapPtr pMask, PixmapPtr pDst) +{ + RINFO_FROM_SCREEN(pDst->drawable.pScreen); + CARD32 dst_format, dst_offset, dst_pitch; + CARD32 pp_cntl, blendcntl, cblend, ablend, colorpitch; + int pixel_shift; + ACCEL_PREAMBLE(); + + TRACE; + + if (!info->XInited3D) + RADEONInit3DEngine(pScrn); + + RADEONGetDestFormat(pDstPicture, &dst_format); + pixel_shift = pDst->drawable.bitsPerPixel >> 4; + + dst_offset = exaGetPixmapOffset(pDst) + info->fbLocation; + dst_pitch = exaGetPixmapPitch(pDst); + colorpitch = dst_pitch >> pixel_shift; + if (RADEONPixmapIsColortiled(pDst)) + colorpitch |= RADEON_COLOR_TILE_ENABLE; + + if ((dst_offset & 0x0f) != 0) + RADEON_FALLBACK(("Bad destination offset 0x%x\n", (int)dst_offset)); + if (((dst_pitch >> pixel_shift) & 0x7) != 0) + RADEON_FALLBACK(("Bad destination pitch 0x%x\n", (int)dst_pitch)); + + if (!FUNC_NAME(R200TextureSetup)(pSrcPicture, pSrc, 0)) + return FALSE; + pp_cntl = RADEON_TEX_0_ENABLE | RADEON_TEX_BLEND_0_ENABLE; + + if (pMask != NULL) { + if (!FUNC_NAME(R200TextureSetup)(pMaskPicture, pMask, 1)) + return FALSE; + pp_cntl |= RADEON_TEX_1_ENABLE; + } else { + is_transform[1] = FALSE; + } + + RADEON_SWITCH_TO_3D(); + + BEGIN_ACCEL(11); + + OUT_ACCEL_REG(RADEON_PP_CNTL, pp_cntl); + OUT_ACCEL_REG(RADEON_RB3D_CNTL, dst_format | RADEON_ALPHA_BLEND_ENABLE); + OUT_ACCEL_REG(RADEON_RB3D_COLOROFFSET, dst_offset); + + OUT_ACCEL_REG(R200_SE_VTX_FMT_0, R200_VTX_XY); + OUT_ACCEL_REG(R200_SE_VTX_FMT_1, + (2 << R200_VTX_TEX0_COMP_CNT_SHIFT) | + (2 << R200_VTX_TEX1_COMP_CNT_SHIFT)); + + OUT_ACCEL_REG(RADEON_RB3D_COLORPITCH, colorpitch); + + /* IN operator: Multiply src by mask components or mask alpha. + * BLEND_CTL_ADD is A * B + C. + * If a picture is a8, we have to explicitly zero its color values. + * If the destination is a8, we have to route the alpha to red, I think. + */ + cblend = R200_TXC_OP_MADD | R200_TXC_ARG_C_ZERO; + ablend = R200_TXA_OP_MADD | R200_TXA_ARG_C_ZERO; + + if (pDstPicture->format == PICT_a8) + cblend |= R200_TXC_ARG_A_R0_ALPHA; + else if (pSrcPicture->format == PICT_a8) + cblend |= R200_TXC_ARG_A_ZERO; + else + cblend |= R200_TXC_ARG_A_R0_COLOR; + ablend |= R200_TXA_ARG_A_R0_ALPHA; + + if (pMask) { + if (pMaskPicture->componentAlpha && + pDstPicture->format != PICT_a8) + cblend |= R200_TXC_ARG_B_R1_COLOR; + else + cblend |= R200_TXC_ARG_B_R1_ALPHA; + ablend |= R200_TXA_ARG_B_R1_ALPHA; + } else { + cblend |= R200_TXC_ARG_B_ZERO | R200_TXC_COMP_ARG_B; + ablend |= R200_TXA_ARG_B_ZERO | R200_TXA_COMP_ARG_B; + } + + OUT_ACCEL_REG(R200_PP_TXCBLEND_0, cblend); + OUT_ACCEL_REG(R200_PP_TXCBLEND2_0, + R200_TXC_CLAMP_0_1 | R200_TXC_OUTPUT_REG_R0); + OUT_ACCEL_REG(R200_PP_TXABLEND_0, ablend); + OUT_ACCEL_REG(R200_PP_TXABLEND2_0, + R200_TXA_CLAMP_0_1 | R200_TXA_OUTPUT_REG_R0); + + /* Op operator. */ + blendcntl = RADEONGetBlendCntl(op, pDstPicture->format); + OUT_ACCEL_REG(RADEON_RB3D_BLENDCNTL, blendcntl); + FINISH_ACCEL(); + + return TRUE; +} + +#ifdef ACCEL_CP + +#define VTX_DWORD_COUNT 6 + +#define VTX_OUT(_dstX, _dstY, _srcX, _srcY, _maskX, _maskY) \ +do { \ + OUT_RING_F(_dstX); \ + OUT_RING_F(_dstY); \ + OUT_RING_F(_srcX); \ + OUT_RING_F(_srcY); \ + OUT_RING_F(_maskX); \ + OUT_RING_F(_maskY); \ +} while (0) + +#else /* ACCEL_CP */ + +#define VTX_REG_COUNT 6 + +#define VTX_OUT(_dstX, _dstY, _srcX, _srcY, _maskX, _maskY) \ +do { \ + OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, _dstX); \ + OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, _dstY); \ + OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, _srcX); \ + OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, _srcY); \ + OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, _maskX); \ + OUT_ACCEL_REG(RADEON_SE_PORT_DATA0, _maskY); \ +} while (0) + +#endif /* !ACCEL_CP */ + +static void FUNC_NAME(RadeonComposite)(PixmapPtr pDst, + int srcX, int srcY, + int maskX, int maskY, + int dstX, int dstY, + int w, int h) +{ + RINFO_FROM_SCREEN(pDst->drawable.pScreen); + int srcXend, srcYend, maskXend, maskYend; + PictVector v; + ACCEL_PREAMBLE(); + + ENTER_DRAW(0); + + /*ErrorF("RadeonComposite (%d,%d) (%d,%d) (%d,%d) (%d,%d)\n", + srcX, srcY, maskX, maskY,dstX, dstY, w, h);*/ + + srcXend = srcX + w; + srcYend = srcY + h; + maskXend = maskX + w; + maskYend = maskY + h; + if (is_transform[0]) { + v.vector[0] = IntToxFixed(srcX); + v.vector[1] = IntToxFixed(srcY); + v.vector[2] = xFixed1; + PictureTransformPoint(transform[0], &v); + srcX = xFixedToInt(v.vector[0]); + srcY = xFixedToInt(v.vector[1]); + v.vector[0] = IntToxFixed(srcXend); + v.vector[1] = IntToxFixed(srcYend); + v.vector[2] = xFixed1; + PictureTransformPoint(transform[0], &v); + srcXend = xFixedToInt(v.vector[0]); + srcYend = xFixedToInt(v.vector[1]); + } + if (is_transform[1]) { + v.vector[0] = IntToxFixed(maskX); + v.vector[1] = IntToxFixed(maskY); + v.vector[2] = xFixed1; + PictureTransformPoint(transform[1], &v); + maskX = xFixedToInt(v.vector[0]); + maskY = xFixedToInt(v.vector[1]); + v.vector[0] = IntToxFixed(maskXend); + v.vector[1] = IntToxFixed(maskYend); + v.vector[2] = xFixed1; + PictureTransformPoint(transform[1], &v); + maskXend = xFixedToInt(v.vector[0]); + maskYend = xFixedToInt(v.vector[1]); + } + +#ifdef ACCEL_CP + if (info->ChipFamily < CHIP_FAMILY_R200) { + BEGIN_RING(4 * VTX_DWORD_COUNT + 3); + OUT_RING(CP_PACKET3(RADEON_CP_PACKET3_3D_DRAW_IMMD, + 4 * VTX_DWORD_COUNT + 1)); + OUT_RING(RADEON_CP_VC_FRMT_XY | + RADEON_CP_VC_FRMT_ST0 | + RADEON_CP_VC_FRMT_ST1); + OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_FAN | + RADEON_CP_VC_CNTL_PRIM_WALK_RING | + RADEON_CP_VC_CNTL_MAOS_ENABLE | + RADEON_CP_VC_CNTL_VTX_FMT_RADEON_MODE | + (4 << RADEON_CP_VC_CNTL_NUM_SHIFT)); + } else { + BEGIN_RING(4 * VTX_DWORD_COUNT + 2); + OUT_RING(CP_PACKET3(R200_CP_PACKET3_3D_DRAW_IMMD_2, + 4 * VTX_DWORD_COUNT)); + OUT_RING(RADEON_CP_VC_CNTL_PRIM_TYPE_TRI_FAN | + RADEON_CP_VC_CNTL_PRIM_WALK_RING | + (4 << RADEON_CP_VC_CNTL_NUM_SHIFT)); + } + + VTX_OUT(dstX, dstY, srcX, srcY, maskX, maskY); + VTX_OUT(dstX, dstY + h, srcX, srcYend, maskX, maskYend); + VTX_OUT(dstX + w, dstY + h, srcXend, srcYend, maskXend, maskYend); + VTX_OUT(dstX + w, dstY, srcXend, srcY, maskXend, maskY); + ADVANCE_RING(); +#else /* ACCEL_CP */ + BEGIN_ACCEL(1 + VTX_REG_COUNT * 4); + if (info->ChipFamily < CHIP_FAMILY_R200) { + OUT_ACCEL_REG(RADEON_SE_VF_CNTL, (RADEON_VF_PRIM_TYPE_TRIANGLE_FAN | + RADEON_VF_PRIM_WALK_DATA | + RADEON_VF_RADEON_MODE | + 4 << RADEON_VF_NUM_VERTICES_SHIFT)); + } else { + OUT_ACCEL_REG(RADEON_SE_VF_CNTL, (RADEON_VF_PRIM_TYPE_QUAD_LIST | + RADEON_VF_PRIM_WALK_DATA | + 4 << RADEON_VF_NUM_VERTICES_SHIFT)); + } + + VTX_OUT(dstX, dstY, srcX, srcY, maskX, maskY); + VTX_OUT(dstX, dstY + h, srcX, srcYend, maskX, maskYend); + VTX_OUT(dstX + w, dstY + h, srcXend, srcYend, maskXend, maskYend); + VTX_OUT(dstX + w, dstY, srcXend, srcY, maskXend, maskY); + FINISH_ACCEL(); +#endif /* !ACCEL_CP */ + + LEAVE_DRAW(0); +} +#undef VTX_OUT + +#ifdef ONLY_ONCE +static void RadeonDoneComposite(PixmapPtr pDst) +{ + ENTER_DRAW(0); + LEAVE_DRAW(0); +} +#endif /* ONLY_ONCE */ + +#undef ONLY_ONCE diff --git a/src/radeon_mergedfb.c b/src/radeon_mergedfb.c index c04577d1..ba0b343c 100644 --- a/src/radeon_mergedfb.c +++ b/src/radeon_mergedfb.c @@ -1961,7 +1961,14 @@ RADEONSetCursorPositionMerged(ScrnInfoPtr pScrn, int x, int y) OUTREG(RADEON_CUR_HORZ_VERT_POSN, (RADEON_CUR_LOCK | ((xorigin ? 0 : x1) << 16) | (yorigin ? 0 : y1))); - OUTREG(RADEON_CUR_OFFSET, info->cursor_start + yorigin * stride); +#ifdef USE_EXA + if (info->useEXA) + OUTREG(RADEON_CUR_OFFSET, info->cursorArea->offset + yorigin * stride); +#endif /* USE_EXA */ +#ifdef USE_XAA + if (!info->useEXA) + OUTREG(RADEON_CUR_OFFSET, info->cursor_offset + yorigin * stride); +#endif /* USE_XAA */ /* cursor2 */ OUTREG(RADEON_CUR2_HORZ_VERT_OFF, (RADEON_CUR2_LOCK | (xorigin << 16) @@ -1969,9 +1976,14 @@ RADEONSetCursorPositionMerged(ScrnInfoPtr pScrn, int x, int y) OUTREG(RADEON_CUR2_HORZ_VERT_POSN, (RADEON_CUR2_LOCK | ((xorigin ? 0 : x2) << 16) | (yorigin ? 0 : y2))); - OUTREG(RADEON_CUR2_OFFSET, - info->cursor_start + yorigin * stride); - +#ifdef USE_EXA + if (info->useEXA) + OUTREG(RADEON_CUR2_OFFSET, info->cursorArea->offset + yorigin * stride); +#endif /* USE_EXA */ +#ifdef USE_XAA + if (!info->useEXA) + OUTREG(RADEON_CUR2_OFFSET, info->cursor_offset + yorigin * stride); +#endif /* USE_XAA */ } /* radeon Xv helpers */ diff --git a/src/radeon_reg.h b/src/radeon_reg.h index 92418e67..13f5ad0d 100644 --- a/src/radeon_reg.h +++ b/src/radeon_reg.h @@ -53,6 +53,22 @@ #ifndef _RADEON_REG_H_ #define _RADEON_REG_H_ +#define ATI_DATATYPE_VQ 0 +#define ATI_DATATYPE_CI4 1 +#define ATI_DATATYPE_CI8 2 +#define ATI_DATATYPE_ARGB1555 3 +#define ATI_DATATYPE_RGB565 4 +#define ATI_DATATYPE_RGB888 5 +#define ATI_DATATYPE_ARGB8888 6 +#define ATI_DATATYPE_RGB332 7 +#define ATI_DATATYPE_Y8 8 +#define ATI_DATATYPE_RGB8 9 +#define ATI_DATATYPE_CI16 10 +#define ATI_DATATYPE_VYUY_422 11 +#define ATI_DATATYPE_YVYU_422 12 +#define ATI_DATATYPE_AYUV_444 14 +#define ATI_DATATYPE_ARGB4444 15 + /* Registers for 2D/Video/Overlay */ #define RADEON_ADAPTER_ID 0x0f2c /* PCI */ #define RADEON_AGP_BASE 0x0170 @@ -1226,6 +1242,28 @@ # define RADEON_RB2D_DC_FLUSH_ALL 0xf # define RADEON_RB2D_DC_BUSY (1 << 31) #define RADEON_RB2D_DSTCACHE_MODE 0x3428 + +#define RADEON_RB3D_DSTCACHE_MODE 0x3258 +# define RADEON_RB3D_DC_CACHE_ENABLE (0) +# define RADEON_RB3D_DC_2D_CACHE_DISABLE (1) +# define RADEON_RB3D_DC_3D_CACHE_DISABLE (2) +# define RADEON_RB3D_DC_CACHE_DISABLE (3) +# define RADEON_RB3D_DC_2D_CACHE_LINESIZE_128 (1 << 2) +# define RADEON_RB3D_DC_3D_CACHE_LINESIZE_128 (2 << 2) +# define RADEON_RB3D_DC_2D_CACHE_AUTOFLUSH (1 << 8) +# define RADEON_RB3D_DC_3D_CACHE_AUTOFLUSH (2 << 8) +# define R200_RB3D_DC_2D_CACHE_AUTOFREE (1 << 10) +# define R200_RB3D_DC_3D_CACHE_AUTOFREE (2 << 10) +# define RADEON_RB3D_DC_FORCE_RMW (1 << 16) +# define RADEON_RB3D_DC_DISABLE_RI_FILL (1 << 24) +# define RADEON_RB3D_DC_DISABLE_RI_READ (1 << 25) + +#define RADEON_RB3D_DSTCACHE_CTLSTAT 0x325C +# define RADEON_RB3D_DC_FLUSH (3 << 0) +# define RADEON_RB3D_DC_FREE (3 << 2) +# define RADEON_RB3D_DC_FLUSH_ALL 0xf +# define RADEON_RB3D_DC_BUSY (1 << 31) + #define RADEON_REG_BASE 0x0f18 /* PCI */ #define RADEON_REGPROG_INF 0x0f09 /* PCI */ #define RADEON_REVISION_ID 0x0f08 /* PCI */ @@ -2305,6 +2343,11 @@ # define R200_VC_16BIT_SWAP (1 << 0) # define R200_VC_32BIT_SWAP (2 << 0) #define R200_PP_TXFILTER_0 0x2c00 +#define R200_PP_TXFILTER_1 0x2c20 +#define R200_PP_TXFILTER_2 0x2c40 +#define R200_PP_TXFILTER_3 0x2c60 +#define R200_PP_TXFILTER_4 0x2c80 +#define R200_PP_TXFILTER_5 0x2ca0 # define R200_MAG_FILTER_NEAREST (0 << 0) # define R200_MAG_FILTER_LINEAR (1 << 0) # define R200_MAG_FILTER_MASK (1 << 0) @@ -2355,6 +2398,11 @@ # define R200_BORDER_MODE_OGL (0 << 31) # define R200_BORDER_MODE_D3D (1 << 31) #define R200_PP_TXFORMAT_0 0x2c04 +#define R200_PP_TXFORMAT_1 0x2c24 +#define R200_PP_TXFORMAT_2 0x2c44 +#define R200_PP_TXFORMAT_3 0x2c64 +#define R200_PP_TXFORMAT_4 0x2c84 +#define R200_PP_TXFORMAT_5 0x2ca4 # define R200_TXFORMAT_I8 (0 << 0) # define R200_TXFORMAT_AI88 (1 << 0) # define R200_TXFORMAT_RGB332 (2 << 0) @@ -2394,15 +2442,42 @@ # define R200_TXFORMAT_CHROMA_KEY_ENABLE (1 << 29) # define R200_TXFORMAT_CUBIC_MAP_ENABLE (1 << 30) #define R200_PP_TXFORMAT_X_0 0x2c08 +#define R200_PP_TXFORMAT_X_1 0x2c28 +#define R200_PP_TXFORMAT_X_2 0x2c48 +#define R200_PP_TXFORMAT_X_3 0x2c68 +#define R200_PP_TXFORMAT_X_4 0x2c88 +#define R200_PP_TXFORMAT_X_5 0x2ca8 + #define R200_PP_TXSIZE_0 0x2c0c /* NPOT only */ +#define R200_PP_TXSIZE_1 0x2c2c /* NPOT only */ +#define R200_PP_TXSIZE_2 0x2c4c /* NPOT only */ +#define R200_PP_TXSIZE_3 0x2c6c /* NPOT only */ +#define R200_PP_TXSIZE_4 0x2c8c /* NPOT only */ +#define R200_PP_TXSIZE_5 0x2cac /* NPOT only */ + #define R200_PP_TXPITCH_0 0x2c10 /* NPOT only */ +#define R200_PP_TXPITCH_1 0x2c30 /* NPOT only */ +#define R200_PP_TXPITCH_2 0x2c50 /* NPOT only */ +#define R200_PP_TXPITCH_3 0x2c70 /* NPOT only */ +#define R200_PP_TXPITCH_4 0x2c90 /* NPOT only */ +#define R200_PP_TXPITCH_5 0x2cb0 /* NPOT only */ + #define R200_PP_TXOFFSET_0 0x2d00 # define R200_TXO_ENDIAN_NO_SWAP (0 << 0) # define R200_TXO_ENDIAN_BYTE_SWAP (1 << 0) # define R200_TXO_ENDIAN_WORD_SWAP (2 << 0) # define R200_TXO_ENDIAN_HALFDW_SWAP (3 << 0) +# define R200_TXO_MACRO_LINEAR (0 << 2) +# define R200_TXO_MACRO_TILE (1 << 2) +# define R200_TXO_MICRO_LINEAR (0 << 3) +# define R200_TXO_MICRO_TILE (1 << 3) # define R200_TXO_OFFSET_MASK 0xffffffe0 # define R200_TXO_OFFSET_SHIFT 5 +#define R200_PP_TXOFFSET_1 0x2d18 +#define R200_PP_TXOFFSET_2 0x2d30 +#define R200_PP_TXOFFSET_3 0x2d48 +#define R200_PP_TXOFFSET_4 0x2d60 +#define R200_PP_TXOFFSET_5 0x2d78 #define R200_PP_TFACTOR_0 0x2ee0 #define R200_PP_TFACTOR_1 0x2ee4 diff --git a/src/radeon_render.c b/src/radeon_render.c index 32e6a3e5..191fb693 100644 --- a/src/radeon_render.c +++ b/src/radeon_render.c @@ -1,7 +1,38 @@ +/* + * Copyright 2004 Eric Anholt + * All Rights Reserved. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice (including the next + * paragraph) shall be included in all copies or substantial portions of the + * Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * Authors: + * Eric Anholt <anholt@FreeBSD.org> + * Hui Yu <hyu@ati.com> + * + */ + #ifdef HAVE_CONFIG_H #include "config.h" #endif +#ifdef USE_XAA + #include "dixstruct.h" #include "xaa.h" @@ -10,11 +41,6 @@ #ifndef RENDER_GENERIC_HELPER #define RENDER_GENERIC_HELPER -static void RadeonInit3DEngineMMIO(ScrnInfoPtr pScrn); -#ifdef XF86DRI -static void RadeonInit3DEngineCP(ScrnInfoPtr pScrn); -#endif - struct blendinfo { Bool dst_alpha; Bool src_alpha; @@ -228,24 +254,6 @@ ATILog2(int val) return bits - 1; } -static void RadeonInit3DEngine(ScrnInfoPtr pScrn) -{ - RADEONInfoPtr info = RADEONPTR (pScrn); - -#ifdef XF86DRI - if (info->directRenderingEnabled) { - RADEONSAREAPrivPtr pSAREAPriv; - - pSAREAPriv = DRIGetSAREAPrivate(pScrn->pScreen); - pSAREAPriv->ctxOwner = DRIGetContext(pScrn->pScreen); - RadeonInit3DEngineCP(pScrn); - } else -#endif - RadeonInit3DEngineMMIO(pScrn); - - info->RenderInited3D = TRUE; -} - static void RemoveLinear (FBLinearPtr linear) { @@ -357,61 +365,6 @@ static void RADEONRestoreByteswap(RADEONInfoPtr info) #endif #endif - -static void FUNC_NAME(RadeonInit3DEngine)(ScrnInfoPtr pScrn) -{ - RADEONInfoPtr info = RADEONPTR(pScrn); - ACCEL_PREAMBLE(); - - if (info->ChipFamily >= CHIP_FAMILY_R300) { - /* Unimplemented */ - } else if ((info->ChipFamily == CHIP_FAMILY_RV250) || - (info->ChipFamily == CHIP_FAMILY_RV280) || - (info->ChipFamily == CHIP_FAMILY_RS300) || - (info->ChipFamily == CHIP_FAMILY_R200)) { - - BEGIN_ACCEL(7); - if (info->ChipFamily == CHIP_FAMILY_RS300) { - OUT_ACCEL_REG(R200_SE_VAP_CNTL_STATUS, RADEON_TCL_BYPASS); - } else { - OUT_ACCEL_REG(R200_SE_VAP_CNTL_STATUS, 0); - } - OUT_ACCEL_REG(R200_PP_CNTL_X, 0); - OUT_ACCEL_REG(R200_PP_TXMULTI_CTL_0, 0); - OUT_ACCEL_REG(R200_SE_VTX_STATE_CNTL, 0); - OUT_ACCEL_REG(R200_RE_CNTL, 0x0); - /* XXX: correct? Want it to be like RADEON_VTX_ST?_NONPARAMETRIC */ - OUT_ACCEL_REG(R200_SE_VTE_CNTL, R200_VTX_ST_DENORMALIZED); - OUT_ACCEL_REG(R200_SE_VAP_CNTL, R200_VAP_FORCE_W_TO_ONE | - R200_VAP_VF_MAX_VTX_NUM); - FINISH_ACCEL(); - } else { - BEGIN_ACCEL(2); - if ((info->ChipFamily == CHIP_FAMILY_RADEON) || - (info->ChipFamily == CHIP_FAMILY_RV200)) - OUT_ACCEL_REG(RADEON_SE_CNTL_STATUS, 0); - else - OUT_ACCEL_REG(RADEON_SE_CNTL_STATUS, RADEON_TCL_BYPASS); - OUT_ACCEL_REG(RADEON_SE_COORD_FMT, - RADEON_VTX_XY_PRE_MULT_1_OVER_W0 | - RADEON_VTX_ST0_NONPARAMETRIC | - RADEON_VTX_ST1_NONPARAMETRIC | - RADEON_TEX1_W_ROUTING_USE_W0); - FINISH_ACCEL(); - } - - BEGIN_ACCEL(3); - OUT_ACCEL_REG(RADEON_RE_TOP_LEFT, 0); - OUT_ACCEL_REG(RADEON_RE_WIDTH_HEIGHT, 0x07ff07ff); - OUT_ACCEL_REG(RADEON_SE_CNTL, RADEON_DIFFUSE_SHADE_GOURAUD | - RADEON_BFACE_SOLID | - RADEON_FFACE_SOLID | - RADEON_VTX_PIX_CENTER_OGL | - RADEON_ROUND_MODE_ROUND | - RADEON_ROUND_PREC_4TH_PIX); - FINISH_ACCEL(); -} - static Bool FUNC_NAME(R100SetupTexture)( ScrnInfoPtr pScrn, CARD32 format, @@ -474,9 +427,10 @@ static Bool FUNC_NAME(R100SetupTexture)( while ( height ) { tmp_dst = RADEONHostDataBlit( pScrn, tex_bytepp, width, - dst_pitch, &buf_pitch, - &dst, &height, &hpass ); - RADEONHostDataBlitCopyPass( tmp_dst, src, hpass, buf_pitch, src_pitch ); + dst_pitch, &buf_pitch, + &dst, &height, &hpass); + RADEONHostDataBlitCopyPass( pScrn, tex_bytepp, tmp_dst, src, + hpass, buf_pitch, src_pitch ); src += hpass * src_pitch; } @@ -542,8 +496,8 @@ FUNC_NAME(R100SetupForCPUToScreenAlphaTexture) ( if (blend_cntl == 0) return FALSE; - if (!info->RenderInited3D) - RadeonInit3DEngine(pScrn); + if (!info->XInited3D) + RADEONInit3DEngine(pScrn); if (!FUNC_NAME(R100SetupTexture)(pScrn, maskFormat, alphaPtr, alphaPitch, width, height, flags)) @@ -593,8 +547,8 @@ FUNC_NAME(R100SetupForCPUToScreenTexture) ( if (blend_cntl == 0) return FALSE; - if (!info->RenderInited3D) - RadeonInit3DEngine(pScrn); + if (!info->XInited3D) + RADEONInit3DEngine(pScrn); if (!FUNC_NAME(R100SetupTexture)(pScrn, srcFormat, texPtr, texPitch, width, height, flags)) @@ -805,9 +759,10 @@ static Bool FUNC_NAME(R200SetupTexture)( while ( height ) { tmp_dst = RADEONHostDataBlit( pScrn, tex_bytepp, width, - dst_pitch, &buf_pitch, - &dst, &height, &hpass ); - RADEONHostDataBlitCopyPass( tmp_dst, src, hpass, buf_pitch, src_pitch ); + dst_pitch, &buf_pitch, + &dst, &height, &hpass ); + RADEONHostDataBlitCopyPass( pScrn, tex_bytepp, tmp_dst, src, + hpass, buf_pitch, src_pitch ); src += hpass * src_pitch; } @@ -873,8 +828,8 @@ FUNC_NAME(R200SetupForCPUToScreenAlphaTexture) ( if (blend_cntl == 0) return FALSE; - if (!info->RenderInited3D) - RadeonInit3DEngine(pScrn); + if (!info->XInited3D) + RADEONInit3DEngine(pScrn); if (!FUNC_NAME(R200SetupTexture)(pScrn, maskFormat, alphaPtr, alphaPitch, width, height, flags)) @@ -925,8 +880,8 @@ FUNC_NAME(R200SetupForCPUToScreenTexture) ( if (blend_cntl == 0) return FALSE; - if (!info->RenderInited3D) - RadeonInit3DEngine(pScrn); + if (!info->XInited3D) + RADEONInit3DEngine(pScrn); if (!FUNC_NAME(R200SetupTexture)(pScrn, srcFormat, texPtr, texPitch, width, height, flags)) @@ -1074,4 +1029,4 @@ FUNC_NAME(R200SubsequentCPUToScreenTexture) ( } #undef FUNC_NAME - +#endif /* USE_XAA */ diff --git a/src/radeon_video.c b/src/radeon_video.c index 2c2dffd9..6a27616b 100644 --- a/src/radeon_video.c +++ b/src/radeon_video.c @@ -14,6 +14,7 @@ #include "xf86.h" #include "dixstruct.h" #include "xf86PciInfo.h" +#include "xf86fbman.h" #include <X11/extensions/Xv.h> #include "fourcc.h" @@ -24,6 +25,13 @@ #include "msp3430.h" #include "tda9885.h" +#ifdef USE_EXA +/* FIXME : the video code hasn't been ported so this is a hack to make + * it compile at all without too much ifdefing */ +#include "xaa.h" +#include "xf86fbman.h" +#endif + #define OFF_DELAY 250 /* milliseconds */ #define FREE_DELAY 15000 @@ -80,6 +88,7 @@ static int RADEONPutImage(ScrnInfoPtr, short, short, short, short, short, short, Bool, RegionPtr, pointer); static int RADEONQueryImageAttributes(ScrnInfoPtr, int, unsigned short *, unsigned short *, int *, int *); +static void RADEONFreeMemory(ScrnInfoPtr pScrn, void *mem_struct); static void RADEONVideoTimerCallback(ScrnInfoPtr pScrn, Time now); static int RADEONPutVideo(ScrnInfoPtr pScrn, short src_x, short src_y, short drw_x, short drw_y, @@ -114,6 +123,19 @@ static Atom xvOvAlpha, xvGrAlpha, xvAlphaMode; (RADEONPortPrivPtr)((RADEONPTR(pScrn))->adaptor->pPortPrivates[0].ptr) +#ifdef USE_EXA +static void +ATIVideoSave(ScreenPtr pScreen, ExaOffscreenArea *area) +{ + ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; + RADEONInfoPtr info = RADEONPTR(pScrn); + RADEONPortPrivPtr pPriv = info->adaptor->pPortPrivates[0].ptr; + + if (pPriv->video_memory == area) + pPriv->video_memory = NULL; +} +#endif /* USE_EXA */ + void RADEONInitVideo(ScreenPtr pScreen) { ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum]; @@ -1038,7 +1060,8 @@ RADEONResetVideo(ScrnInfoPtr pScrn) /* this function is called from ScreenInit. pScreen is used by XAA internally, but not valid until ScreenInit finishs. */ - if (info->accelOn && pScrn->pScreen) info->accel->Sync(pScrn); + if (info->accelOn && pScrn->pScreen) + RADEON_SYNC(info, pScrn); /* this is done here because each time the server is reset these could change.. Otherwise they remain constant */ @@ -1204,20 +1227,20 @@ static void RADEONSetupTheatre(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) } else { t->wComp0Connector=RT_COMP1; } - xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Composite connector is port %d\n", t->wComp0Connector); + xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Composite connector is port %ld\n", t->wComp0Connector); break; case 3: if(a & 0x4){ t->wSVideo0Connector=RT_YCR_COMP4; } else { t->wSVideo0Connector=RT_YCF_COMP4; } - xf86DrvMsg(pScrn->scrnIndex, X_INFO, "SVideo connector is port %d\n", t->wSVideo0Connector); + xf86DrvMsg(pScrn->scrnIndex, X_INFO, "SVideo connector is port %ld\n", t->wSVideo0Connector); break; default: break; } } - xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Rage Theatre: Connectors (detected): tuner=%d, composite=%d, svideo=%d\n", + xf86DrvMsg(pScrn->scrnIndex, X_INFO, "Rage Theatre: Connectors (detected): tuner=%ld, composite=%ld, svideo=%ld\n", t->wTunerConnector, t->wComp0Connector, t->wSVideo0Connector); } @@ -1226,7 +1249,7 @@ static void RADEONSetupTheatre(ScrnInfoPtr pScrn, RADEONPortPrivPtr pPriv) if(info->RageTheatreCompositePort>=0)t->wComp0Connector=info->RageTheatreCompositePort; if(info->RageTheatreSVideoPort>=0)t->wSVideo0Connector=info->RageTheatreSVideoPort; - xf86DrvMsg(pScrn->scrnIndex, X_INFO, "RageTheatre: Connectors (using): tuner=%d, composite=%d, svideo=%d\n", + xf86DrvMsg(pScrn->scrnIndex, X_INFO, "RageTheatre: Connectors (using): tuner=%ld, composite=%ld, svideo=%ld\n", t->wTunerConnector, t->wComp0Connector, t->wSVideo0Connector); switch((info->RageTheatreCrystal>=0)?info->RageTheatreCrystal:pll->reference_freq){ @@ -1253,7 +1276,6 @@ RADEONAllocAdaptor(ScrnInfoPtr pScrn) XF86VideoAdaptorPtr adapt; RADEONInfoPtr info = RADEONPTR(pScrn); RADEONPortPrivPtr pPriv; - unsigned char *RADEONMMIO = info->MMIO; CARD32 dot_clock; if(!(adapt = xf86XVAllocateVideoAdaptorRec(pScrn))) @@ -1531,9 +1553,9 @@ RADEONStopVideo(ScrnInfoPtr pScrn, pointer data, Bool cleanup) if(pPriv->uda1380 != NULL) xf86_uda1380_mute(pPriv->uda1380, TRUE); if(pPriv->i2c != NULL) RADEON_board_setmisc(pPriv); } - if(info->videoLinear) { - xf86FreeOffscreenLinear(info->videoLinear); - info->videoLinear = NULL; + if (pPriv->video_memory != NULL) { + RADEONFreeMemory(pScrn, pPriv->video_memory); + pPriv->video_memory = NULL; } pPriv->videoStatus = 0; } else { @@ -1556,7 +1578,7 @@ RADEONSetPortAttribute(ScrnInfoPtr pScrn, Bool setAlpha = FALSE; unsigned char *RADEONMMIO = info->MMIO; - info->accel->Sync(pScrn); + RADEON_SYNC(info, pScrn); #define RTFSaturation(a) (1.0 + ((a)*1.0)/1000.0) #define RTFBrightness(a) (((a)*1.0)/2000.0) @@ -1785,7 +1807,7 @@ RADEONSetPortAttribute(ScrnInfoPtr pScrn, else if(attribute == xvAdjustment) { pPriv->adjustment=value; - xf86DrvMsg(pScrn->scrnIndex,X_ERROR,"Setting pPriv->adjustment to %d\n", pPriv->adjustment); + xf86DrvMsg(pScrn->scrnIndex,X_ERROR,"Setting pPriv->adjustment to %ld\n", pPriv->adjustment); if(pPriv->tda9885!=0){ pPriv->tda9885->top_adjustment=value; RADEON_TDA9885_SetEncoding(pPriv); @@ -1825,7 +1847,7 @@ RADEONGetPortAttribute(ScrnInfoPtr pScrn, RADEONInfoPtr info = RADEONPTR(pScrn); RADEONPortPrivPtr pPriv = (RADEONPortPrivPtr)data; - if (info->accelOn) info->accel->Sync(pScrn); + if (info->accelOn) RADEON_SYNC(info, pScrn); if(attribute == xvAutopaintColorkey) *value = pPriv->autopaint_colorkey; @@ -2008,11 +2030,11 @@ RADEONCopyData( ScrnInfoPtr pScrn, unsigned char *src, unsigned char *dst, - int srcPitch, - int dstPitch, - int h, - int w, - int bpp + unsigned int srcPitch, + unsigned int dstPitch, + unsigned int h, + unsigned int w, + unsigned int bpp ){ RADEONInfoPtr info = RADEONPTR(pScrn); #ifdef XF86DRI @@ -2033,7 +2055,8 @@ RADEONCopyData( while ( buf = RADEONHostDataBlit( pScrn, bpp, w, dstPitch, &bufPitch, &dst, &h, &hpass ) ) { - RADEONHostDataBlitCopyPass( buf, src, hpass, bufPitch, srcPitch ); + RADEONHostDataBlitCopyPass( pScrn, bpp, buf, src, hpass, bufPitch, + srcPitch ); src += hpass * srcPitch; } @@ -2046,24 +2069,25 @@ RADEONCopyData( { #if X_BYTE_ORDER == X_BIG_ENDIAN unsigned char *RADEONMMIO = info->MMIO; - if ( bpp == 2 ) - { - OUTREG(RADEON_SURFACE_CNTL, info->ModeReg.surface_cntl - & ~(RADEON_NONSURF_AP0_SWP_32BPP - | RADEON_NONSURF_AP0_SWP_16BPP)); - } - else /* bpp == 4 */ - { - OUTREG(RADEON_SURFACE_CNTL, (info->ModeReg.surface_cntl - | RADEON_NONSURF_AP0_SWP_32BPP) - & ~RADEON_NONSURF_AP0_SWP_16BPP); + unsigned int swapper = info->ModeReg.surface_cntl & + ~(RADEON_NONSURF_AP0_SWP_32BPP | RADEON_NONSURF_AP1_SWP_32BPP | + RADEON_NONSURF_AP0_SWP_16BPP | RADEON_NONSURF_AP1_SWP_16BPP); + + switch(bpp) { + case 2: + swapper |= RADEON_NONSURF_AP0_SWP_16BPP + | RADEON_NONSURF_AP1_SWP_16BPP; + break; + case 4: + swapper |= RADEON_NONSURF_AP0_SWP_32BPP + | RADEON_NONSURF_AP1_SWP_32BPP; + break; } + OUTREG(RADEON_SURFACE_CNTL, swapper); #endif + w *= bpp; - w *= 2; - - while (h--) - { + while (h--) { memcpy(dst, src, w); src += srcPitch; dst += dstPitch; @@ -2097,13 +2121,13 @@ RADEONCopyRGB24Data( ScrnInfoPtr pScrn, unsigned char *src, unsigned char *dst, - int srcPitch, - int dstPitch, - int h, - int w + unsigned int srcPitch, + unsigned int dstPitch, + unsigned int h, + unsigned int w ){ CARD32 *dptr; - CARD8 *sptr; + CARD8 *sptr = 0; int i,j; RADEONInfoPtr info = RADEONPTR(pScrn); #ifdef XF86DRI @@ -2113,6 +2137,8 @@ RADEONCopyRGB24Data( CARD32 bufPitch; unsigned int hpass; + /* XXX Fix endian flip on R300 */ + while ( dptr = ( CARD32* )RADEONHostDataBlit( pScrn, 4, w, dstPitch, &bufPitch, &dst, &h, &hpass ) ) @@ -2171,11 +2197,11 @@ RADEONCopyMungedData( unsigned char *src2, unsigned char *src3, unsigned char *dst1, - int srcPitch, - int srcPitch2, - int dstPitch, - int h, - int w + unsigned int srcPitch, + unsigned int srcPitch2, + unsigned int dstPitch, + unsigned int h, + unsigned int w ){ RADEONInfoPtr info = RADEONPTR(pScrn); #ifdef XF86DRI @@ -2186,6 +2212,8 @@ RADEONCopyMungedData( CARD32 y = 0, bufPitch; unsigned int hpass; + /* XXX Fix endian flip on R300 */ + while ( buf = RADEONHostDataBlit( pScrn, 4, w/2, dstPitch, &bufPitch, &dst1, &h, &hpass ) ) { @@ -2249,9 +2277,8 @@ RADEONCopyMungedData( { src2 += srcPitch2; src3 += srcPitch2; - } + } } - #if X_BYTE_ORDER == X_BIG_ENDIAN /* restore byte swapping */ OUTREG(RADEON_SURFACE_CNTL, info->ModeReg.surface_cntl); @@ -2259,46 +2286,111 @@ RADEONCopyMungedData( } } - -static FBLinearPtr +/* Allocates memory, either by resizing the allocation pointed to by mem_struct, + * or by freeing mem_struct (if non-NULL) and allocating a new space. The size + * is measured in bytes, and the offset from the beginning of card space is + * returned. + */ +static CARD32 RADEONAllocateMemory( ScrnInfoPtr pScrn, - FBLinearPtr linear, + void **mem_struct, int size ){ - ScreenPtr pScreen; - FBLinearPtr new_linear; + ScreenPtr pScreen; + RADEONInfoPtr info = RADEONPTR(pScrn); + int offset = 0; - if(linear) { - if(linear->size >= size) - return linear; + pScreen = screenInfo.screens[pScrn->scrnIndex]; +#ifdef USE_EXA + if (info->useEXA) { + ExaOffscreenArea *area = *mem_struct; - if(xf86ResizeOffscreenLinear(linear, size)) - return linear; + if (area != NULL) { + if (area->size >= size) + return area->offset; - xf86FreeOffscreenLinear(linear); - } + exaOffscreenFree(pScrn->pScreen, area); + } - pScreen = screenInfo.screens[pScrn->scrnIndex]; + area = exaOffscreenAlloc(pScrn->pScreen, size, 64, TRUE, ATIVideoSave, + NULL); + *mem_struct = area; + if (area == NULL) + return 0; + offset = area->offset; + } +#endif /* USE_EXA */ +#ifdef USE_XAA + if (!info->useEXA) { + FBLinearPtr linear = *mem_struct; + int cpp = info->CurrentLayout.bitsPerPixel / 8; + + /* XAA allocates in units of pixels at the screen bpp, so adjust size + * appropriately. + */ + size = (size + cpp - 1) / cpp; + + if (linear) { + if(linear->size >= size) + return linear->offset * cpp; + + if(xf86ResizeOffscreenLinear(linear, size)) + return linear->offset * cpp; + + xf86FreeOffscreenLinear(linear); + } - new_linear = xf86AllocateOffscreenLinear(pScreen, size, 16, + linear = xf86AllocateOffscreenLinear(pScreen, size, 16, NULL, NULL, NULL); + *mem_struct = linear; - if(!new_linear) { - int max_size; + if (!linear) { + int max_size; - xf86QueryLargestOffscreenLinear(pScreen, &max_size, 16, - PRIORITY_EXTREME); + xf86QueryLargestOffscreenLinear(pScreen, &max_size, 16, + PRIORITY_EXTREME); - if(max_size < size) - return NULL; + if(max_size < size) + return 0; - xf86PurgeUnlockedOffscreenAreas(pScreen); - new_linear = xf86AllocateOffscreenLinear(pScreen, size, 16, - NULL, NULL, NULL); - } + xf86PurgeUnlockedOffscreenAreas(pScreen); + linear = xf86AllocateOffscreenLinear(pScreen, size, 16, + NULL, NULL, NULL); + *mem_struct = linear; + if (!linear) + return 0; + } + offset = linear->offset * cpp; + } +#endif /* USE_XAA */ - return new_linear; + return offset; +} + +static void +RADEONFreeMemory( + ScrnInfoPtr pScrn, + void *mem_struct +){ + RADEONInfoPtr info = RADEONPTR(pScrn); + +#ifdef USE_EXA + if (info->useEXA) { + ExaOffscreenArea *area = mem_struct; + + if (area != NULL) + exaOffscreenFree(pScrn->pScreen, area); + } +#endif /* USE_EXA */ +#ifdef USE_XAA + if (!info->useEXA) { + FBLinearPtr linear = mem_struct; + + if (linear != NULL) + xf86FreeOffscreenLinear(linear); + } +#endif /* USE_XAA */ } static void @@ -2478,7 +2570,7 @@ RADEONDisplayVideo( RADEONWaitForFifo(pScrn, 2); OUTREG(RADEON_OV0_REG_LOAD_CNTL, 1); - if (info->accelOn) info->accel->Sync(pScrn); + if (info->accelOn) RADEON_SYNC(info, pScrn); while(!(INREG(RADEON_OV0_REG_LOAD_CNTL) & (1 << 3))); dsr=(double)(1<<0xC)/h_inc; @@ -2686,27 +2778,22 @@ RADEONPutImage( case FOURCC_RGB24: dstPitch=(width*4+0x0f)&(~0x0f); srcPitch=width*3; - new_size=(dstPitch*height+bpp-1)/bpp; break; case FOURCC_RGBA32: dstPitch=(width*4+0x0f)&(~0x0f); srcPitch=width*4; - new_size=(dstPitch*height+bpp-1)/bpp; break; case FOURCC_RGBT16: dstPitch=(width*2+0x0f)&(~0x0f); srcPitch=(width*2+3)&(~0x03); - new_size=(dstPitch*height+bpp-1)/bpp; break; case FOURCC_RGB16: dstPitch=(width*2+0x0f)&(~0x0f); srcPitch=(width*2+3)&(~0x03); - new_size=(dstPitch*height+bpp-1)/bpp; break; case FOURCC_YV12: case FOURCC_I420: dstPitch = ((width << 1) + 63) & ~63; - new_size = ((dstPitch * height) + bpp - 1) / bpp; srcPitch = (width + 3) & ~3; s2offset = srcPitch * height; srcPitch2 = ((width >> 1) + 3) & ~3; @@ -2716,16 +2803,16 @@ RADEONPutImage( case FOURCC_YUY2: default: dstPitch = ((width << 1) + 63) & ~63; - new_size = ((dstPitch * height) + bpp - 1) / bpp; srcPitch = (width << 1); break; } - if(!(info->videoLinear = RADEONAllocateMemory(pScrn, info->videoLinear, - pPriv->doubleBuffer ? (new_size << 1) : new_size))) - { - return BadAlloc; - } + new_size = dstPitch * height; + pPriv->video_offset = RADEONAllocateMemory(pScrn, &pPriv->video_memory, + (pPriv->doubleBuffer ? + (new_size * 2) : new_size)); + if (pPriv->video_offset == 0) + return BadAlloc; pPriv->currentBuffer ^= 1; @@ -2734,9 +2821,10 @@ RADEONPutImage( left = (xa >> 16) & ~1; npixels = ((((xb + 0xffff) >> 16) + 1) & ~1) - left; - offset = (info->videoLinear->offset * bpp) + (top * dstPitch); + offset = (pPriv->video_offset) + (top * dstPitch); + if(pPriv->doubleBuffer) - offset += pPriv->currentBuffer * new_size * bpp; + offset += pPriv->currentBuffer * new_size; dst_start = info->FB + offset; @@ -2861,9 +2949,9 @@ RADEONVideoTimerCallback(ScrnInfoPtr pScrn, Time now) } } else { /* FREE_TIMER */ if(pPriv->freeTime < now) { - if(info->videoLinear) { - xf86FreeOffscreenLinear(info->videoLinear); - info->videoLinear = NULL; + if (pPriv->video_memory != NULL) { + RADEONFreeMemory(pScrn, pPriv->video_memory); + pPriv->video_memory = NULL; } pPriv->videoStatus = 0; info->VideoTimerCallback = NULL; @@ -2875,7 +2963,7 @@ RADEONVideoTimerCallback(ScrnInfoPtr pScrn, Time now) /****************** Offscreen stuff ***************/ typedef struct { - FBLinearPtr linear; + void *surface_memory; Bool isOn; } OffscreenPrivRec, * OffscreenPrivPtr; @@ -2887,46 +2975,46 @@ RADEONAllocateSurface( unsigned short h, XF86SurfacePtr surface ){ - FBLinearPtr linear; - int pitch, size, bpp; + int offset, pitch, size; OffscreenPrivPtr pPriv; + void *surface_memory = NULL; if((w > 1024) || (h > 1024)) return BadAlloc; w = (w + 1) & ~1; pitch = ((w << 1) + 15) & ~15; - bpp = pScrn->bitsPerPixel >> 3; - size = ((pitch * h) + bpp - 1) / bpp; + size = pitch * h; - if(!(linear = RADEONAllocateMemory(pScrn, NULL, size))) + offset = RADEONAllocateMemory(pScrn, &surface_memory, size); + if (offset == 0) return BadAlloc; surface->width = w; surface->height = h; if(!(surface->pitches = xalloc(sizeof(int)))) { - xf86FreeOffscreenLinear(linear); + RADEONFreeMemory(pScrn, surface_memory); return BadAlloc; } if(!(surface->offsets = xalloc(sizeof(int)))) { xfree(surface->pitches); - xf86FreeOffscreenLinear(linear); + RADEONFreeMemory(pScrn, surface_memory); return BadAlloc; } if(!(pPriv = xalloc(sizeof(OffscreenPrivRec)))) { xfree(surface->pitches); xfree(surface->offsets); - xf86FreeOffscreenLinear(linear); + RADEONFreeMemory(pScrn, surface_memory); return BadAlloc; } - pPriv->linear = linear; + pPriv->surface_memory = surface_memory; pPriv->isOn = FALSE; surface->pScrn = pScrn; surface->id = id; surface->pitches[0] = pitch; - surface->offsets[0] = linear->offset * bpp; + surface->offsets[0] = offset; surface->devPrivate.ptr = (pointer)pPriv; return Success; @@ -2952,11 +3040,12 @@ static int RADEONFreeSurface( XF86SurfacePtr surface ){ + ScrnInfoPtr pScrn = surface->pScrn; OffscreenPrivPtr pPriv = (OffscreenPrivPtr)surface->devPrivate.ptr; if(pPriv->isOn) RADEONStopSurface(surface); - xf86FreeOffscreenLinear(pPriv->linear); + RADEONFreeMemory(pScrn, pPriv->surface_memory); xfree(surface->pitches); xfree(surface->offsets); xfree(surface->devPrivate.ptr); @@ -3107,7 +3196,9 @@ RADEONPutVideo( RADEONPortPrivPtr pPriv = (RADEONPortPrivPtr)data; unsigned char *RADEONMMIO = info->MMIO; INT32 xa, xb, ya, yb, top; - unsigned int pitch, new_size, offset1, offset2, offset3, offset4, s2offset, s3offset, vbi_offset0, vbi_offset1; + unsigned int pitch, new_size, alloc_size; + unsigned int offset1, offset2, offset3, offset4, s2offset, s3offset; + unsigned int vbi_offset0, vbi_offset1; int srcPitch, srcPitch2, dstPitch; int bpp; BoxRec dstBox; @@ -3116,7 +3207,7 @@ RADEONPutVideo( int mult; int vbi_line_width, vbi_start, vbi_end; - info->accel->Sync(pScrn); + RADEON_SYNC(info, pScrn); /* * s2offset, s3offset - byte offsets into U and V plane of the * source where copying starts. Y plane is @@ -3200,7 +3291,6 @@ RADEONPutVideo( case FOURCC_I420: top &= ~1; dstPitch = ((width << 1) + 15) & ~15; - new_size = ((dstPitch * height) + bpp - 1) / bpp; srcPitch = (width + 3) & ~3; s2offset = srcPitch * height; srcPitch2 = ((width >> 1) + 3) & ~3; @@ -3210,16 +3300,21 @@ RADEONPutVideo( case FOURCC_YUY2: default: dstPitch = ((width<<1) + 15) & ~15; - new_size = ((dstPitch * height) + bpp - 1) / bpp; srcPitch = (width<<1); break; } + new_size = dstPitch * height; new_size = new_size + 0x1f; /* for aligning */ - if(!(info->videoLinear = RADEONAllocateMemory(pScrn, info->videoLinear, new_size*mult+(pPriv->capture_vbi_data?2*2*vbi_line_width*21:0)))) - { - return BadAlloc; - } + alloc_size = new_size * mult; + if (pPriv->capture_vbi_data) + alloc_size += 2 * 2 * vbi_line_width * 21; + + pPriv->video_offset = RADEONAllocateMemory(pScrn, &pPriv->video_memory, + (pPriv->doubleBuffer ? + (new_size * 2) : new_size)); + if (pPriv->video_offset == 0) + return BadAlloc; /* I have suspicion that capture engine must be active _before_ Rage Theatre is being manipulated with.. */ @@ -3232,20 +3327,20 @@ RADEONPutVideo( switch(pPriv->overlay_deinterlacing_method){ case METHOD_BOB: case METHOD_SINGLE: - offset1 = (info->videoLinear->offset*bpp+0xf) & (~0xf); - offset2 = ((info->videoLinear->offset+new_size)*bpp + 0xf) & (~0xf); + offset1 = (pPriv->video_offset + 0xf) & (~0xf); + offset2 = (pPriv->video_offset + new_size + 0xf) & (~0xf); offset3 = offset1; offset4 = offset2; break; case METHOD_WEAVE: - offset1 = (info->videoLinear->offset*bpp+0xf) & (~0xf); + offset1 = (pPriv->video_offset + 0xf) & (~0xf); offset2 = offset1+dstPitch; - offset3 = ((info->videoLinear->offset+2*new_size)*bpp + 0xf) & (~0xf); + offset3 = (pPriv->video_offset + 2 * new_size + 0xf) & (~0xf); offset4 = offset3+dstPitch; break; default: - offset1 = (info->videoLinear->offset*bpp+0xf) & (~0xf); - offset2 = ((info->videoLinear->offset+new_size)*bpp + 0xf) & (~0xf); + offset1 = (pPriv->video_offset + 0xf) & (~0xf); + offset2 = (pPriv->video_offset + new_size + 0xf) & (~0xf); offset3 = offset1; offset4 = offset2; } @@ -3268,8 +3363,7 @@ RADEONPutVideo( vbi_end = 20; } - - vbi_offset0 = ((info->videoLinear->offset+mult*new_size)*bpp+0xf) & (~0xf); + vbi_offset0 = (pPriv->video_offset + mult * new_size * bpp + 0xf) & (~0xf); vbi_offset1 = vbi_offset0 + dstPitch*20; OUTREG(RADEON_CAP0_VBI0_OFFSET, vbi_offset0+display_base); OUTREG(RADEON_CAP0_VBI1_OFFSET, vbi_offset1+display_base); diff --git a/src/radeon_video.h b/src/radeon_video.h index 5894c8d0..33e78dfa 100644 --- a/src/radeon_video.h +++ b/src/radeon_video.h @@ -81,6 +81,14 @@ typedef struct { Bool autopaint_colorkey; Bool crt2; /* 0=CRT1, 1=CRT2 */ +#ifdef USE_EXA + int size; + ExaOffscreenArea *off_screen; +#endif + + void *video_memory; + int video_offset; + Atom device_id, location_id, instance_id; } RADEONPortPrivRec, *RADEONPortPrivPtr; |