/* * Copyright 2003 through 2004 by Marc Aurele La France (TSI @ UQV), tsi@xfree86.org * * Permission to use, copy, modify, distribute, and sell this software and its * documentation for any purpose is hereby granted without fee, provided that * the above copyright notice appear in all copies and that both that copyright * notice and this permission notice appear in supporting documentation, and * that the name of Marc Aurele La France not be used in advertising or * publicity pertaining to distribution of the software without specific, * written prior permission. Marc Aurele La France makes no representations * about the suitability of this software for any purpose. It is provided * "as-is" without express or implied warranty. * * MARC AURELE LA FRANCE DISCLAIMS ALL WARRANTIES WITH REGARD TO THIS SOFTWARE, * INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS. IN NO * EVENT SHALL MARC AURELE LA FRANCE BE LIABLE FOR ANY SPECIAL, INDIRECT OR * CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS OF USE, * DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER * TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR * PERFORMANCE OF THIS SOFTWARE. */ /* * Copyright 1999-2000 Precision Insight, Inc., Cedar Park, Texas. * All Rights Reserved. * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal in the Software without restriction, including without limitation the * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice (including the next * paragraph) shall be included in all copies or substantial portions of the * Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL * PRECISION INSIGHT AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER * DEALINGS IN THE SOFTWARE. */ /* * DRI support by: * Manuel Teira * Leif Delgass * * EXA support by: * Jakub Stachowski * George Sapountzis */ #ifdef HAVE_CONFIG_H #include "config.h" #endif #include #include "ati.h" #include "atichip.h" #include "atidri.h" #include "atimach64accel.h" #include "atimach64io.h" #include "atipriv.h" #include "atiregs.h" #ifdef XF86DRI_DEVEL #include "mach64_dri.h" #include "mach64_sarea.h" #endif #ifdef USE_EXA extern CARD8 ATIMach64ALU[]; extern void ATIMach64ValidateClip ( ATIPtr pATI, int sc_left, int sc_right, int sc_top, int sc_bottom ); #if 0 #define MACH64_TRACE(x) \ do { \ ErrorF("Mach64(%s): ", __FUNCTION__); \ ErrorF x; \ } while(0) #else #define MACH64_TRACE(x) do { } while(0) #endif #if 0 #define MACH64_FALLBACK(x) \ do { \ ErrorF("Fallback(%s): ", __FUNCTION__); \ ErrorF x; \ return FALSE; \ } while (0) #else #define MACH64_FALLBACK(x) return FALSE #endif static void Mach64WaitMarker(ScreenPtr pScreenInfo, int Marker) { ATIMach64Sync(xf86Screens[pScreenInfo->myNum]); } static Bool Mach64GetDatatypeBpp(PixmapPtr pPix, CARD32 *pix_width) { int bpp = pPix->drawable.bitsPerPixel; switch (bpp) { case 8: *pix_width = SetBits(PIX_WIDTH_8BPP, DP_DST_PIX_WIDTH) | SetBits(PIX_WIDTH_8BPP, DP_SRC_PIX_WIDTH) | SetBits(PIX_WIDTH_1BPP, DP_HOST_PIX_WIDTH); break; case 16: *pix_width = SetBits(PIX_WIDTH_16BPP, DP_DST_PIX_WIDTH) | SetBits(PIX_WIDTH_16BPP, DP_SRC_PIX_WIDTH) | SetBits(PIX_WIDTH_1BPP, DP_HOST_PIX_WIDTH); break; case 24: *pix_width = SetBits(PIX_WIDTH_8BPP, DP_DST_PIX_WIDTH) | SetBits(PIX_WIDTH_8BPP, DP_SRC_PIX_WIDTH) | SetBits(PIX_WIDTH_1BPP, DP_HOST_PIX_WIDTH); break; case 32: *pix_width = SetBits(PIX_WIDTH_32BPP, DP_DST_PIX_WIDTH) | SetBits(PIX_WIDTH_32BPP, DP_SRC_PIX_WIDTH) | SetBits(PIX_WIDTH_1BPP, DP_HOST_PIX_WIDTH); break; default: MACH64_FALLBACK(("Unsupported bpp: %d\n", bpp)); } #if X_BYTE_ORDER == X_LITTLE_ENDIAN *pix_width |= DP_BYTE_PIX_ORDER; #endif /* X_BYTE_ORDER */ return TRUE; } static Bool Mach64GetOffsetPitch(PixmapPtr pPix, int bpp, CARD32 *pitch_offset, unsigned int offset, unsigned int pitch) { #if 0 ScrnInfoPtr pScreenInfo = xf86Screens[pPix->drawable.pScreen->myNum]; ATIPtr pATI = ATIPTR(pScreenInfo); if (pitch % pATI->pExa->pixmapPitchAlign != 0) MACH64_FALLBACK(("Bad pitch 0x%08x\n", pitch)); if (offset % pATI->pExa->pixmapOffsetAlign != 0) MACH64_FALLBACK(("Bad offset 0x%08x\n", offset)); #endif /* pixels / 8 = ((bytes * 8) / bpp) / 8 = bytes / bpp */ pitch = pitch / bpp; /* bytes / 8 */ offset = offset >> 3; *pitch_offset = ((pitch << 22) | (offset << 0)); return TRUE; } static Bool Mach64GetPixmapOffsetPitch(PixmapPtr pPix, CARD32 *pitch_offset) { CARD32 pitch, offset; int bpp; bpp = pPix->drawable.bitsPerPixel; if (bpp == 24) bpp = 8; pitch = exaGetPixmapPitch(pPix); offset = exaGetPixmapOffset(pPix); return Mach64GetOffsetPitch(pPix, bpp, pitch_offset, offset, pitch); } static Bool Mach64PrepareCopy ( PixmapPtr pSrcPixmap, PixmapPtr pDstPixmap, int xdir, int ydir, int alu, Pixel planemask ) { ScrnInfoPtr pScreenInfo = xf86Screens[pDstPixmap->drawable.pScreen->myNum]; ATIPtr pATI = ATIPTR(pScreenInfo); CARD32 src_pitch_offset, dst_pitch_offset, dp_pix_width; ATIDRISync(pScreenInfo); if (!Mach64GetDatatypeBpp(pDstPixmap, &dp_pix_width)) return FALSE; if (!Mach64GetPixmapOffsetPitch(pSrcPixmap, &src_pitch_offset)) return FALSE; if (!Mach64GetPixmapOffsetPitch(pDstPixmap, &dst_pitch_offset)) return FALSE; ATIMach64WaitForFIFO(pATI, 7); outf(DP_WRITE_MASK, planemask); outf(DP_PIX_WIDTH, dp_pix_width); outf(SRC_OFF_PITCH, src_pitch_offset); outf(DST_OFF_PITCH, dst_pitch_offset); outf(DP_SRC, DP_MONO_SRC_ALLONES | SetBits(SRC_BLIT, DP_FRGD_SRC) | SetBits(SRC_BKGD, DP_BKGD_SRC)); outf(DP_MIX, SetBits(ATIMach64ALU[alu], DP_FRGD_MIX)); outf(CLR_CMP_CNTL, CLR_CMP_FN_FALSE); pATI->dst_cntl = 0; if (ydir > 0) pATI->dst_cntl |= DST_Y_DIR; if (xdir > 0) pATI->dst_cntl |= DST_X_DIR; if (pATI->XModifier == 1) outf(DST_CNTL, pATI->dst_cntl); else pATI->dst_cntl |= DST_24_ROT_EN; return TRUE; } static void Mach64Copy ( PixmapPtr pDstPixmap, int srcX, int srcY, int dstX, int dstY, int w, int h ) { ScrnInfoPtr pScreenInfo = xf86Screens[pDstPixmap->drawable.pScreen->myNum]; ATIPtr pATI = ATIPTR(pScreenInfo); srcX *= pATI->XModifier; dstY *= pATI->XModifier; w *= pATI->XModifier; ATIDRISync(pScreenInfo); /* Disable clipping if it gets in the way */ ATIMach64ValidateClip(pATI, dstX, dstX + w - 1, dstY, dstY + h - 1); if (!(pATI->dst_cntl & DST_X_DIR)) { srcX += w - 1; dstX += w - 1; } if (!(pATI->dst_cntl & DST_Y_DIR)) { srcY += h - 1; dstY += h - 1; } if (pATI->XModifier != 1) outf(DST_CNTL, pATI->dst_cntl | SetBits((dstX / 4) % 6, DST_24_ROT)); ATIMach64WaitForFIFO(pATI, 4); outf(SRC_Y_X, SetWord(srcX, 1) | SetWord(srcY, 0)); outf(SRC_WIDTH1, w); outf(DST_Y_X, SetWord(dstX, 1) | SetWord(dstY, 0)); outf(DST_HEIGHT_WIDTH, SetWord(w, 1) | SetWord(h, 0)); /* * On VTB's and later, the engine will randomly not wait for a copy * operation to commit its results to video memory before starting the next * one. The probability of such occurrences increases with GUI_WB_FLUSH * (or GUI_WB_FLUSH_P) setting, bitsPerPixel and/or CRTC clock. This * would point to some kind of video memory bandwidth problem were it noti * for the fact that the problem occurs less often (but still occurs) when * copying larger rectangles. */ if ((pATI->Chip >= ATI_CHIP_264VTB) && !pATI->OptionDevel) { exaMarkSync(pScreenInfo->pScreen); /* Force sync. */ exaWaitSync(pScreenInfo->pScreen); /* Sync and notify EXA. */ } } static void Mach64DoneCopy(PixmapPtr pDstPixmap) { } static Bool Mach64PrepareSolid ( PixmapPtr pPixmap, int alu, Pixel planemask, Pixel fg ) { ScrnInfoPtr pScreenInfo = xf86Screens[pPixmap->drawable.pScreen->myNum]; ATIPtr pATI = ATIPTR(pScreenInfo); CARD32 dst_pitch_offset, dp_pix_width; ATIDRISync(pScreenInfo); if (!Mach64GetDatatypeBpp(pPixmap, &dp_pix_width)) return FALSE; if (!Mach64GetPixmapOffsetPitch(pPixmap, &dst_pitch_offset)) return FALSE; ATIMach64WaitForFIFO(pATI, 7); outf(DP_WRITE_MASK, planemask); outf(DP_PIX_WIDTH, dp_pix_width); outf(DST_OFF_PITCH, dst_pitch_offset); outf(DP_SRC, DP_MONO_SRC_ALLONES | SetBits(SRC_FRGD, DP_FRGD_SRC) | SetBits(SRC_BKGD, DP_BKGD_SRC)); outf(DP_FRGD_CLR, fg); outf(DP_MIX, SetBits(ATIMach64ALU[alu], DP_FRGD_MIX)); outf(CLR_CMP_CNTL, CLR_CMP_FN_FALSE); if (pATI->XModifier == 1) outf(DST_CNTL, DST_X_DIR | DST_Y_DIR); return TRUE; } static void Mach64Solid ( PixmapPtr pPixmap, int x1, int y1, int x2, int y2 ) { ScrnInfoPtr pScreenInfo = xf86Screens[pPixmap->drawable.pScreen->myNum]; ATIPtr pATI = ATIPTR(pScreenInfo); int x = x1; int y = y1; int w = x2-x1; int h = y2-y1; ATIDRISync(pScreenInfo); if (pATI->XModifier != 1) { x *= pATI->XModifier; w *= pATI->XModifier; outf(DST_CNTL, SetBits((x / 4) % 6, DST_24_ROT) | (DST_X_DIR | DST_Y_DIR | DST_24_ROT_EN)); } /* Disable clipping if it gets in the way */ ATIMach64ValidateClip(pATI, x, x + w - 1, y, y + h - 1); ATIMach64WaitForFIFO(pATI, 2); outf(DST_Y_X, SetWord(x, 1) | SetWord(y, 0)); outf(DST_HEIGHT_WIDTH, SetWord(w, 1) | SetWord(h, 0)); } static void Mach64DoneSolid(PixmapPtr pPixmap) { } /* * Memcpy-based UTS. */ static Bool Mach64UploadToScreen(PixmapPtr pDst, int x, int y, int w, int h, char *src, int src_pitch) { char *dst = pDst->devPrivate.ptr; int dst_pitch = exaGetPixmapPitch(pDst); int bpp = pDst->drawable.bitsPerPixel; int cpp = (bpp + 7) / 8; int wBytes = w * cpp; exaWaitSync(pDst->drawable.pScreen); dst += (x * cpp) + (y * dst_pitch); while (h--) { memcpy(dst, src, wBytes); src += src_pitch; dst += dst_pitch; } return TRUE; } /* * Memcpy-based DFS. */ static Bool Mach64DownloadFromScreen(PixmapPtr pSrc, int x, int y, int w, int h, char *dst, int dst_pitch) { char *src = pSrc->devPrivate.ptr; int src_pitch = exaGetPixmapPitch(pSrc); int bpp = pSrc->drawable.bitsPerPixel; int cpp = (bpp + 7) / 8; int wBytes = w * cpp; exaWaitSync(pSrc->drawable.pScreen); src += (x * cpp) + (y * src_pitch); while (h--) { memcpy(dst, src, wBytes); src += src_pitch; dst += dst_pitch; } return TRUE; } #include "atimach64render.c" /* Compute log base 2 of val. */ static __inline__ int Mach64Log2(int val) { int bits; for (bits = 0; val != 0; val >>= 1, ++bits) ; return bits - 1; } /* * Memory layour for EXA with DRI (no local_textures): * | front | back | depth | textures | pixmaps, xv | c | * * 1024x768@16bpp with 8 MB: * | 1.5 MB | 1.5 MB | 1.5 MB | 0 | ~3.5 MB | c | * * 1024x768@32bpp with 8 MB: * | 3.0 MB | 3.0 MB | 1.5 MB | 0 | ~0.5 MB | c | * * "c" is the hw cursor which occupies 1KB */ static void Mach64SetupMemEXA(ScreenPtr pScreen) { ScrnInfoPtr pScreenInfo = xf86Screens[pScreen->myNum]; ATIPtr pATI = ATIPTR(pScreenInfo); int cpp = (pScreenInfo->bitsPerPixel + 7) / 8; /* front and back buffer */ int bufferSize = pScreenInfo->virtualY * pScreenInfo->displayWidth * cpp; /* always 16-bit z-buffer */ int depthSize = pScreenInfo->virtualY * pScreenInfo->displayWidth * 2; ExaDriverPtr pExa = pATI->pExa; pExa->memoryBase = pATI->pMemory; pExa->memorySize = pScreenInfo->videoRam * 1024; pExa->offScreenBase = bufferSize; #ifdef XF86DRI_DEVEL if (pATI->directRenderingEnabled) { ATIDRIServerInfoPtr pATIDRIServer = pATI->pDRIServerInfo; Bool is_pci = pATIDRIServer->IsPCI; int textureSize = 0; int pixmapCache = 0; int next = 0; /* front buffer */ pATIDRIServer->frontOffset = 0; pATIDRIServer->frontPitch = pScreenInfo->displayWidth; next += bufferSize; /* back buffer */ pATIDRIServer->backOffset = next; pATIDRIServer->backPitch = pScreenInfo->displayWidth; next += bufferSize; /* depth buffer */ pATIDRIServer->depthOffset = next; pATIDRIServer->depthPitch = pScreenInfo->displayWidth; next += depthSize; /* ATIScreenInit does check for the this condition. */ if (next > pExa->memorySize) { xf86DrvMsg(pScreen->myNum, X_WARNING, "DRI static buffer allocation failed, disabling DRI --" "need at least %d kB video memory\n", next / 1024 ); ATIDRICloseScreen(pScreen); pATI->directRenderingEnabled = FALSE; } /* local textures */ /* Reserve approx. half of offscreen memory for local textures */ textureSize = (pExa->memorySize - next) / 2; /* In case DRI requires more offscreen memory than available, * should not happen as ATIScreenInit would have not enabled DRI */ if (textureSize < 0) textureSize = 0; /* Try for enough pixmap cache for a full viewport */ pixmapCache = (pExa->memorySize - next) - textureSize; if (pixmapCache < bufferSize) textureSize = 0; /* Don't allocate a local texture heap for AGP unless requested */ if ( !is_pci && !pATI->OptionLocalTextures ) textureSize = 0; if (textureSize > 0) { int l = Mach64Log2(textureSize / MACH64_NR_TEX_REGIONS); if (l < MACH64_LOG_TEX_GRANULARITY) l = MACH64_LOG_TEX_GRANULARITY; pATIDRIServer->logTextureGranularity = l; /* Round the texture size down to the nearest whole number of * texture regions. */ textureSize = (textureSize >> l) << l; } /* Set a minimum usable local texture heap size. This will fit * two 256x256 textures. We check this after any rounding of * the texture area. */ if (textureSize < 256*256 * cpp * 2) textureSize = 0; /* Disable DRI for PCI if cannot allocate a local texture heap */ if ( is_pci && textureSize == 0 ) { xf86DrvMsg(pScreen->myNum, X_WARNING, "Not enough memory for local textures, disabling DRI\n"); ATIDRICloseScreen(pScreen); pATI->directRenderingEnabled = FALSE; } pATIDRIServer->textureOffset = next; pATIDRIServer->textureSize = textureSize; next += textureSize; if (pATI->directRenderingEnabled) pExa->offScreenBase = next; } #endif /* XF86DRI_DEVEL */ xf86DrvMsg(pScreen->myNum, X_INFO, "EXA memory management initialized\n" "\t base : %10p\n" "\t offscreen: +%10lx\n" "\t size : +%10lx\n" "\t cursor : %10p\n", pExa->memoryBase, pExa->offScreenBase, pExa->memorySize, pATI->pCursorImage); if (TRUE || xf86GetVerbosity() > 1) { int offscreen = pExa->memorySize - pExa->offScreenBase; int viewport = bufferSize; int dvdframe = 720*480*cpp; /* enough for single-buffered DVD */ xf86DrvMsg(pScreen->myNum, X_INFO, "Will use %d kB of offscreen memory for EXA\n" "\t\t or %5.2f viewports (composite)\n" "\t\t or %5.2f dvdframes (xvideo)\n", offscreen / 1024, 1.0 * offscreen / viewport, 1.0 * offscreen / dvdframe); } #ifdef XF86DRI_DEVEL if (pATI->directRenderingEnabled) { ATIDRIServerInfoPtr pATIDRIServer = pATI->pDRIServerInfo; xf86DrvMsg(pScreen->myNum, X_INFO, "Will use back buffer at offset 0x%x\n", pATIDRIServer->backOffset); xf86DrvMsg(pScreen->myNum, X_INFO, "Will use depth buffer at offset 0x%x\n", pATIDRIServer->depthOffset); if (pATIDRIServer->textureSize > 0) { xf86DrvMsg(pScreen->myNum, X_INFO, "Will use %d kB for local textures at offset 0x%x\n", pATIDRIServer->textureSize/1024, pATIDRIServer->textureOffset); } } #endif /* XF86DRI_DEVEL */ pExa->pixmapOffsetAlign = 64; pExa->pixmapPitchAlign = 64; pExa->flags = EXA_OFFSCREEN_PIXMAPS; pExa->maxX = ATIMach64MaxX; pExa->maxY = ATIMach64MaxY; } Bool ATIMach64ExaInit(ScreenPtr pScreen) { ScrnInfoPtr pScreenInfo = xf86Screens[pScreen->myNum]; ATIPtr pATI = ATIPTR(pScreenInfo); ExaDriverPtr pExa; /* FIXME: which chips support EXA ? */ if (pATI->Chip < ATI_CHIP_264CT) { xf86DrvMsg(pScreenInfo->scrnIndex, X_ERROR, "EXA is not supported for ATI chips earlier than " "the ATI Mach64.\n"); return FALSE; } pExa = exaDriverAlloc(); if (!pExa) return FALSE; pATI->pExa = pExa; pExa->exa_major = 2; pExa->exa_minor = 0; Mach64SetupMemEXA(pScreen); pExa->WaitMarker = Mach64WaitMarker; pExa->PrepareSolid = Mach64PrepareSolid; pExa->Solid = Mach64Solid; pExa->DoneSolid = Mach64DoneSolid; pExa->PrepareCopy = Mach64PrepareCopy; pExa->Copy = Mach64Copy; pExa->DoneCopy = Mach64DoneCopy; /* EXA hits more optimized paths when it does not have to fallback because * of missing UTS/DFS, hook memcpy-based UTS/DFS. */ pExa->UploadToScreen = Mach64UploadToScreen; pExa->DownloadFromScreen = Mach64DownloadFromScreen; if (pATI->RenderAccelEnabled) { if (pATI->Chip >= ATI_CHIP_264GTPRO) { /* 3D Rage Pro does not support NPOT textures. */ pExa->flags |= EXA_OFFSCREEN_ALIGN_POT; pExa->CheckComposite = Mach64CheckComposite; pExa->PrepareComposite = Mach64PrepareComposite; pExa->Composite = Mach64Composite; pExa->DoneComposite = Mach64DoneComposite; } else { xf86DrvMsg(pScreen->myNum, X_INFO, "Render acceleration is not supported for ATI chips " "earlier than the ATI 3D Rage Pro.\n"); pATI->RenderAccelEnabled = FALSE; } } xf86DrvMsg(pScreen->myNum, X_INFO, "Render acceleration %s\n", pATI->RenderAccelEnabled ? "enabled" : "disabled"); if (!exaDriverInit(pScreen, pATI->pExa)) { xfree(pATI->pExa); pATI->pExa = NULL; return FALSE; } return TRUE; } #endif